### R examples for nonparametric statistics ### Source: Hollander M, Wolfe D, Nonparametric statistical methods, 2nd edition, Wiley ### Course: APMA168-Nonparametric Statistics, Brown University ### Instructor: Konstantinos Spiliopoulos ##The output from R is in comments, so that you won't get errors after copy-pasting. ##################################################################################################################### ##################################################################################################################### # Chapter 2: Binomial probabilities and tests ######################### An example of binomial probability 2*(1-pbinom(6, 10, .5)) #[1] 0.34375 2*(pbinom(3, 10, .5)) #[1] 0.34375 ######################### A binomial test binom.test(7, 10, .5) # Exact binomial test #data: 7 and 10 #number of successes = 7, number of trials = 10, p-value = 0.3438 #alternative hypothesis: true probability of success is not equal to 0.5 #95 percent confidence interval: # 0.3475471 0.9332605 #sample estimates: #probability of success # 0.7 ### if you need help with any command, for example with "pbinom" you can use help("pbinom") ###################################################################################################################### ###################################################################################################################### # Chapter 3: The One-Sample Location problem, sign tests, Wilcoxon test ###Paired replicates analysis by way of signed ranks,section 3.1-3.3. Wilcoxon Signed-Rank Test ###Data is from table 3.1 X<-c(1.83, 0.5, 1.62, 2.48, 1.68, 1.88, 1.55, 3.06, 1.3) Y<-c(0.878, 0.647, 0.598, 2.05, 1.06, 1.29, 1.06, 3.14, 1.29) Z<-Y-X help("wilcox.test") wilcox.test(Z, alternative = c("two.sided"),mu=0, conf.int=TRUE, exact=TRUE,conf.level = 1-0.049) # Wilcoxon signed rank test #data: Z #V = 5, p-value = 0.03906 #alternative hypothesis: true location is not equal to 0 #95.1 percent confidence interval: # -0.786 -0.010 #sample estimates: #(pseudo)median # -0.46 ################Large sample approximation n<-length(Z) R <- rank(abs(Z)) R #[1] 8 3 9 4 7 6 5 2 1 T <- sum(R[Z> 0]) T #[1] 5 T1 <- (T - (n*(n+1)/4))/(sqrt(n*(n+1)*(2*n+1)/24)) pnorm(T1) #one-sided test #[1] 0.0192 ##smallest significance at which we can reject Ho in favor of the one sided H1 2*pnorm(T1) #two sided test #0.0382 ##smallest significance at which we can reject Ho in favor od the two sided H1 ################# Paired replicate analysis by way of signs, sections 3.4-3.7 ######### ---Data are from Table 3.5--- XDark<-c(5.8, 13.5, 26.1, 7.4, 7.6, 23, 10.7, 9.1, 19.3, 26.3, 17.5, 17.9, 18.3, 14.2, 55.2, 15.4, 30, 21.3, 26.8, 8.1, 24.3, 21.3, 18.2, 22.5, 31.1) Yillumination<-c(5,21,73,25,3,77,59,13,36,46,9,25,59,38,70,36,55,46,25,30,29,46,71,31,33) Z<-Yillumination-XDark Z # [1] -0.8 7.5 46.9 17.6 -4.6 54.0 48.3 3.9 16.7 19.7 -8.5 7.1 40.7 23.8 14.8 20.6 25.0 24.7 -1.8 21.9 4.7 24.7 52.8 8.5 1.9 n<-length(Z) B <- length(Z[Z>0.0]) B # [1] 21 binom.test(B,n, 0.5, alternative=c("greater"),conf.level=1-0.0216) # Exact binomial test #data: B and n #number of successes = 21, number of trials = 25, p-value = 0.0004553 #alternative hypothesis: true probability of success is greater than 0.5 #97.84 percent confidence interval: # 0.6329661 1.0000000 #sample estimates: #probability of success # 0.84 # Since the true probability of success is greater that 0.5, this means that we reject Ho: theta=0 in favor of H1:theta>0. # Note that in place of "greater" one can specify any one of "two.sided" "less" #############Section 3.5--Estimator--- thetaHat<-median(Z) thetaHat ##############Section 3.6---Confidence Interval--- OrderedZ<-sort(as.vector(Z)) conf.level <- 0.8922 n <- length(Z) alpha <- 1 - conf.level b <- qbinom(alpha / 2, n, 1/2,lower.tail = FALSE) c(OrderedZ[n+1-b], OrderedZ[b])#Confidence interval. The difference from the book is due to summetry. ######################## Large sample approximation Z <- (B - (n/2))/(sqrt(n/4)) Z #[1] 3.4 #The approximate p-value is 1-pnorm(Z) #[1] 0.000337 ### With the binomial correction to the large sample approximation the results do not change much: Z <- (B - (n/2))/(sqrt(n/4)) Z #[1] 3.6 1-pnorm(Z) #[1] 0.9998409 ### Note: In the case of ties we can use the same procedure. ### One usually follows the following procedure for dealing with zero differences ### in the sign test. After defining the vector z of differences, use the commands Z <- Z[Z != 0] n <- length(z) ### It treates zero differences as if they were not part of the data. If the hypothesized value mu ### if not zero then replace 0 by mu above, i.e. Z <- Z[Z != mu] n <- length(z) ############################################ ###Chapter 3, problems with ties. ###Let's study the problem of Table 3.4 ###First we will use the wilcox.test command and we will see that we get the wrong answer ###The reason is that the z vector has zero values!!!!Never use wilcox.test or wilcox.exact ###to caluclate estimators or confidence intervals in the presence of ties!!! x<-c(349,400,520,490,574,427,435) y<-c(425,533,362,628,463,427,449) z<-y-x ###wilcox.test gives us wilcox.test(z,alternative=c("two.sided"),mu=0,conf.int=TRUE,exact=FALSE,correct=TRUE,conf.level=1-0.078) # Wilcoxon signed rank test with continuity correction #data: z #V = 12, p-value = 0.834 #alternative hypothesis: true location is not equal to 0 #92.2 percent confidence interval: # -134.5000 133.0000 #sample estimates: #(pseudo)median # 12.70899 ##The median and the confidence interval are wrong!!This is because z vector has zero values. Indeed z #[1] 76 133 -158 138 -111 0 14 ###Let's see how we can program it z <- sort(y - x) walsh <- outer(z, z, "+") / 2 print(walsh <- sort(walsh[lower.tri(walsh, diag = TRUE)])) # [1] -158.0 -134.5 -111.0 -79.0 -72.0 -55.5 -48.5 -41.0 -17.5 -12.5 -10.0 0.0 7.0 11.0 13.5 14.0 38.0 45.0 66.5 69.0 73.5 #[22] 76.0 76.0 104.5 107.0 133.0 135.5 138.0 median(walsh) #[1] 12.25 ##This is the correct estimator. ##Now for the confidence interval one has to experiment a bit in order to achieve and exact 0.922 confidence level. The code is as follows conf.level <- 0.918 m <- length(walsh) m #[1] 28 alpha <- 1 - conf.level n<-length(z) alpha <- 1 - conf.level k <- qsignrank(alpha / 2, n) if (k == 0) k <- k + 1 print(k) #[1] 4 cat("achieved confidence level:",1 - 2 * psignrank(k - 1, n), "\n") #achieved confidence level: 0.921875 c(walsh[k], walsh[m + 1 - k]) #[1] -79 107 ####This is the correct answer for the two.sided confidence interval. ##Of course, all these were programmed using thr powerful commands from R. One could be more premitive (!!!) and use for loops. For example, ##the estimator could also be estimated using the following code W<-NULL for (i in 1:7) { for (j in i:7) { W<-c(W, (z[i]+z[j])/2) } } walsh.alt<-median(W) walsh.alt #[1] 12.25 ####the same asnwer of course!!!!! ###################################################################################################################### ###################################################################################################################### # Chapter 4: The Two-Sample Location problem, ### Mann-Whitney/Wilcoxon Test of location. Data is from Table 4.1. x <- c(0.80, 0.83, 1.89, 1.04, 1.45, 1.38, 1.91, 1.64, 0.73, 1.46) y <- c(1.15, 0.88, 0.90, 0.74, 1.21) ####Section 4.1########################################################### ### A nice way to handle this nonparametric procedure is as follows: mu <- 0 # hypothesized value of median difference m <- length(x) #The length of the vector x n <- length(y) #The length of the vector y y <- y - mu #Substracting the mean data <- c(x, y)#Combining both vectors in one names(data) <- c(rep("x", m), rep("y", n)) names(data) # [1] "x" "x" "x" "x" "x" "x" "x" "x" "x" "x" "y" "y" "y" "y" "y" data <- sort(data) #sorting the data from smaller to larger r <- rank(data) #putting ranks. rbind(data, r) #Combining them in one matrix # x y x x y y x y y x x x x x x #data 0.73 0.74 0.8 0.83 0.88 0.9 1.04 1.15 1.21 1.38 1.45 1.46 1.64 1.89 1.91 #r 1.00 2.00 3.0 4.00 5.00 6.0 7.00 8.00 9.00 10.00 11.00 12.00 13.00 14.00 15.00 w <- sum(r[names(data) == "y"]) #Wilcoxon form defined in equation (4.3) in Hollander and Wolfe w # [1] 30 u <- w - n * (n + 1) / 2 #Mann-Whitney form defined in equation (4.15) in Hollander and Wolfe u # [1] 15 pwilcox(u, m, n)#The p-value of the Wilcoxon test # [1] 0.1272061 ### Remark: For a one-sided upper-tail test the last line would be replaced by 1 - pwilcox(u - 1, nx, ny) ### Remark: For a two-sided test do both the lower-tail test and the upper-tail test and ### double the P-value of the smaller of the two results. ### This is because two tails is twice one tail because of the symmetry of the null distribution of the test statistic. #############Section 4.2################################################################# ### The Hodges-Lehmann estimator associated with the rank sum test can be found as follows differences <- sort(as.vector(outer(y, x, "-"))) DeltaHat<-median(differences) #[1] -0.305 ### The command outer(y, x, "-") needs some exmplanation. It is easier to understand via an example. ### Basically it gives us all the possible differences using a one line command instead of using for loops! x # [1] 0.80 0.83 1.89 1.04 1.45 1.38 1.91 1.64 0.73 1.46 y # [1] 1.15 0.88 0.90 0.74 1.21 outer(y, x, "-") # [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] #[1,] 0.35 0.32 -0.74 0.11 -0.30 -0.23 -0.76 -0.49 0.42 -0.31 #[2,] 0.08 0.05 -1.01 -0.16 -0.57 -0.50 -1.03 -0.76 0.15 -0.58 #[3,] 0.10 0.07 -0.99 -0.14 -0.55 -0.48 -1.01 -0.74 0.17 -0.56 #[4,] -0.06 -0.09 -1.15 -0.30 -0.71 -0.64 -1.17 -0.90 0.01 -0.72 #[5,] 0.41 0.38 -0.68 0.17 -0.24 -0.17 -0.70 -0.43 0.48 -0.25 #############Section 4.3################################################################################### ### The Associated Confidence Interval (Moses) can be found as follows: conf.level <- 0.95 Ldiff <- length(differences) alpha <- 1 - conf.level w <- qwilcox(alpha / 2, m, n) if (w == 0) w <- w + 1 print(k) cat("achieved confidence level:", 1 - 2 * pwilcox(w - 1, m, n), "\n") c(differences[w], differences[Ldiff + 1 - w])#Confidence interval. The difference from the book is due to summetry. #achieved confidence level: 0.96004 #[1] -0.76 0.15 ######################Using a wilcox.test to do the same thing############################################### wilcox.test(y, x, alternative = "two.sided",exact = TRUE, correct = TRUE, conf.int=TRUE, conf.level=1-0.04) # Wilcoxon rank sum test #data: y and x #W = 15, p-value = 0.2544 #alternative hypothesis: true location shift is not equal to 0 #96 percent confidence interval: # -0.76 0.15 #sample estimates: #difference in location # -0.305 ### R defines W as sum of first sample's ranks minus minimum m <- length(x) n <- length(y) XY <- c(x, y) XY # [1] 0.80 0.83 1.89 1.04 1.45 1.38 1.91 1.64 0.73 1.46 1.15 0.88 0.90 0.74 1.21 R <- rank(AB) R # [1] 3 4 14 7 11 10 15 13 1 12 8 5 6 2 9 W <- sum(R[(m+1):(m+n)]) W #[1] 30 W1 <- sum(R[1:m]) W1 #[1] 90 W1 - sum(1:m) #[1] 35 ########################Large scale approximation wilcox.test(x, y, alternative = "less",exact = FALSE, correct = FALSE) # H&W large sample approximation # Wilcoxon rank sum test #data: x and y #W = 35, p-value = 0.8897 #alternative hypothesis: true location shift is less than 0 wilcox.test(rnorm(10), rnorm(10, 2), conf.int = TRUE) # Wilcoxon rank sum test #data: rnorm(10) and rnorm(10, 2) #W = 4, p-value = 0.0001299 #alternative hypothesis: true location shift is not equal to 0 #95 percent confidence interval: # -3.484645 -1.719936 #sample estimates: #difference in location # -2.669803 #################Section 4.4################################################## ## Programmin the FlingerPolicello function is one of your homework problems. > X<-c(297,340,325,227,277,337,250,290) > Y<-c(293,291,289,430,510,353,318) > FlingerPolicello(X,Y) $P [1] 3 4 4 0 0 4 0 1 $Q [1] 4 4 3 8 8 8 5 $Pbar [1] 2 $Qbar [1] 5.714286 $V1 [1] 26 $V2 [1] 29.42857 $U [1] 1.467599 ###################################################################################################################### ###################################################################################################################### # Chapter 5: The Two-Sample Dispersion problem and other two sample problems ##############################Ansari-Bradley Test of Dispersion, section 5.1 ### Two methods of testing iron concetration in blood serum ### All measurements are from a control serum with true value 105 Ramsay <- c(111, 107, 100, 99, 102, 106, 109, 108, 104, 99, 101, 96, 97, 102, 107, 113, 116, 113, 110, 98) Jung.Parekh <- c(107, 108, 106, 98, 105, 103, 110, 105, 104, 100,96, 108, 103, 104, 114, 114, 113, 108, 106, 99) median(Ramsay) #[1] 105 median(Jung.Parekh) #[1] 105.5 stripchart(list(Ramsay, Jung.Parekh), method="j", group.names=c("Ramsay", "Jung-Parekh")) wilcox.test(Ramsay, Jung.Parekh) # Wilcoxon rank sum test with continuity correction #data: Ramsay and Jung.Parekh #W = 185.5, p-value = 0.7044 #alternative hypothesis: true location shift is not equal to 0 #Warning message: #In wilcox.test.default(Ramsay, Jung.Parekh) : # cannot compute exact p-value with ties i # [1] 111 107 100 99 102 106 109 108 104 99 101 96 97 102 107 113 116 113 110 #[20] 98 107 108 106 98 105 103 110 105 104 100 96 108 103 104 114 114 113 108 #[39] 106 99 R # [1] 34.0 25.0 9.5 7.0 12.5 22.0 31.0 28.5 17.0 7.0 11.0 1.5 3.0 12.5 25.0 #[16] 36.0 40.0 36.0 32.5 4.5 25.0 28.5 22.0 4.5 19.5 14.5 32.5 19.5 17.0 9.5 #[31] 1.5 28.5 14.5 17.0 38.5 38.5 36.0 28.5 22.0 7.0 R[R > N/2] <- (N+1) - R[R > N/2] R # [1] 7.0 16.0 9.5 7.0 12.5 19.0 10.0 12.5 17.0 7.0 11.0 1.5 3.0 12.5 16.0 #[16] 5.0 1.0 5.0 8.5 4.5 16.0 12.5 19.0 4.5 19.5 14.5 8.5 19.5 17.0 9.5 #[31] 1.5 12.5 14.5 17.0 2.5 2.5 5.0 12.5 19.0 7.0 sum(R[1:m]) #[1] 185.5 ansari.test(Ramsay, Jung.Parekh) # Ansari-Bradley test #data: Ramsay and Jung.Parekh #AB = 185.5, p-value = 0.1815 #alternative hypothesis: true ratio of scales is not equal to 1 #Warning message: #In ansari.test.default(Ramsay, Jung.Parekh) : # cannot compute exact p-value with ties ansari.test(Ramsay, Jung.Parekh, "less") # One-sided, check if Jung.Parekh has greater dispersion # Ansari-Bradley test #data: Ramsay and Jung.Parekh #AB = 185.5, p-value = 0.9093 #alternative hypothesis: true ratio of scales is less than 1 #Warning message: #In ansari.test.default(Ramsay, Jung.Parekh, "less") : # cannot compute exact p-value with ties ##Warning: Above we had that length(X)=lemgth(Y) ##If they are not equal in length then label Y to be the one with the less observations ##Then the code should be the following. This is problem 5.1 and here N is odd. So we need to take N/2+1 > x <- c(0.80, 0.83, 1.89, 1.04, 1.45, 1.38, 1.91, 1.64, 0.73, 1.46) > y <- c(1.15, 0.88, 0.90, 0.74, 1.21) > xy<-c(x,y) > m<-length(x) > n<-length(y) > R <- rank(xy) > N <- m + n > R > R[R > N/2+1] <- (N+1) - R[R > N/2+1] > R [1] 3 4 2 7 5 6 1 3 1 4 8 5 6 2 7 > sum(R[(m+1):(m+n)]) [1] 28 #####################################Miller Test for Dispersion Based on Jackknife-5.2. ## Programmin the MillerTest_5.2 function is one of your homework problems. ###Data are from Table 5.2. X<-c(6.2,5.9,8.9,6.5,8.6) Y<-c(9.5,9.8,9.5,9.6,10.3) MillerTest_5.2(X,Y) $Abar [1] 0.7522871 $Bbar [1] -1.385421 $V_A [1] 0.1139820 $V_B [1] 2.366502 $Q [1] 1.357314 #########Test for either location or dispersion. Section 5.3 ###Prgramming the Lepage test statistic is one of your homework problems. ###########################################Kolmogorov-Smirnov Test ###WARNING Number 1::::::::::::R calculates the quantity D=max_i |F_m(Z_i)- G_n(Z_i)| ###namely it does not take into account the prefactor mn/d. So you should be ##careful when converting things to the results of the book. The p-value that ## R gives is correct for this particular formulation. ###WARNING Number 2::::::::::::in the two-sample case alternative="greater" includes distributions ###for which x is stochastically smaller than y (the CDF ###of x lies above and hence to the left of that for y), in contrast to wilcox.test. #If you want wilcox.test(x, y, alternative = "less"), then its corresponding thing is #ks.test(x, y, alternative = "greater"). So,if you know what is going on, you will be fine. Ramsay <- c(111, 107, 100, 99, 102, 106, 109, 108, 104, 99, 101, 96, 97, 102, 107, 113, 116, 113, 110, 98) Jung.Parekh <- c(107, 108, 106, 98, 105, 103, 110, 105, 104, 100,96, 108, 103, 104, 114, 114, 113, 108, 106, 99) plot(ecdf(Ramsay), do.points=F, verticals=T, xlim = c(90, 120)) lines(ecdf(Jung.Parekh), do.points=F, verticals=T, col.h="red", col.v="red") ks.test(Ramsay, Jung.Parekh) # Two-sample Kolmogorov-Smirnov test #data: Ramsay and Jung.Parekh #D = 0.25, p-value = 0.5596 #alternative hypothesis: two-sided #Warning message: #In ks.test(Ramsay, Jung.Parekh) : cannot compute correct p-values with ties help("ks.test") ks.test(Ramsay, Jung.Parekh,exact=TRUE) # Two-sample Kolmogorov-Smirnov test #data: Ramsay and Jung.Parekh #D = 0.25, p-value = 0.5596 #alternative hypothesis: two-sided #Warning message: #In ks.test(Ramsay, Jung.Parekh, exact = TRUE) : # cannot compute correct p-values with ties ks.test(Ramsay, Jung.Parekh,alternative = c( "less"), exact=TRUE) # Two-sample Kolmogorov-Smirnov test #data: Ramsay and Jung.Parekh #D^- = 0.1, p-value = 0.8187 #alternative hypothesis: the CDF of x lies below that of y #Warning message: #In ks.test(Ramsay, Jung.Parekh, alternative = c("less"), exact = TRUE) : # cannot compute correct p-values with ties ks.test(Ramsay, Jung.Parekh,alternative = c( "greater"), exact=TRUE) # Two-sample Kolmogorov-Smirnov test #data: Ramsay and Jung.Parekh #D^+ = 0.25, p-value = 0.2865 #alternative hypothesis: the CDF of x lies above that of y #Warning message: #In ks.test(Ramsay, Jung.Parekh, alternative = c("greater"), exact = TRUE) : # cannot compute correct p-values with ties ###################################################################################################################### ###################################################################################################################### ####### Chapter 6: The One-Way Layout ### Kruskal test for general alternatives, Data are from Table 6.1. normal <- c(2.9, 3.0, 2.5, 2.6, 3.2) # normal subjects oadisease <- c(3.8, 2.7, 4.0, 2.4) # subjects with obstructive airway disease asbestosis <- c(2.8, 3.4, 3.7, 2.2, 2.0) # subjects with asbestosis kruskal.test(list(normal, oadisease, asbestosis)) # Kruskal-Wallis rank sum test #data: list(normal, oadisease, asbestosis) #Kruskal-Wallis chi-squared = 0.7714, df = 2, p-value = 0.68 ##It works fine with ties!!!!!!good news!!!! ###################################################################################################################### ###################################################################################################################### ## Chapter 7: The Two-Way Layout ### Friedman test for general alternatives in a randomized complete block design. ### This is section 7.1, data are from table 7.1 ### Comparison of three methods ("round out", "narrow angle", and ### "wide angle") for rounding first base. For each of 18 players ### and the three method, the average time of two runs from a point on ### the first base line 35ft from home plate to a point 15ft short of ### second base is recorded. RoundingTimes <- matrix(c(5.40, 5.50, 5.55, 5.85, 5.70, 5.75, 5.20, 5.60, 5.50, 5.55, 5.50, 5.40, 5.90, 5.85, 5.70, 5.45, 5.55, 5.60, 5.40, 5.40, 5.35, 5.45, 5.50, 5.35, 5.25, 5.15, 5.00, 5.85, 5.80, 5.70, 5.25, 5.20, 5.10, 5.65, 5.55, 5.45, 5.60, 5.35, 5.45, 5.05, 5.00, 4.95, 5.50, 5.50, 5.40, 5.45, 5.55, 5.50, 5.55, 5.55, 5.35, 5.45, 5.50, 5.55, 5.50, 5.45, 5.25, 5.65, 5.60, 5.40, 5.70, 5.65, 5.55, 6.30, 6.30, 6.25), nrow = 22, byrow = TRUE, dimnames = list(1 : 22, c("Round Out", "Narrow Angle", "Wide Angle"))) friedman.test(RoundingTimes) # Friedman rank sum test #data: RoundingTimes #Friedman chi-squared = 11.1429, df = 2, p-value = 0.003805 ## strong evidence against the null that the methods are equivalent ## with respect to speed ###################################################################################################################### ###################################################################################################################### # Chapter 8: The Independence Problem #####Kendall test for independence based on signs, Data are from Table 8.1. This is section 8.1 and 8.2. Hunter <- c(44.4, 45.9, 41.9, 53.3, 44.7, 44.1, 50.7, 45.2, 60.1) Panel <- c( 2.6, 3.1, 2.5, 5.0, 3.6, 4.0, 5.2, 2.8, 3.8) plot(Hunter, Panel) cor(Hunter, Panel, method="kendall") ##Kendall point estimate, section 8.2. #[1] 0.4444444 pos <- 0 neg <- 0 n<-length(Hunter) for (i in 1:(n-1)) for (j in (i+1):n) { if ((Hunter[j]-Hunter[i])*(Panel[j]-Panel[i]) > 0) pos <- pos+1 else neg <- neg +1} pos #[1] 26 neg #[1] 10 2*(pos-neg)/(n*(n-1)) #[1] 0.4444444 cor.test(Hunter, Panel, method="kendall", alt="greater") ##Kendall test for independence based on signs # Kendall's rank correlation tau #data: HunterL and Panel #T = 26, p-value = 0.05972 #alternative hypothesis: true tau is greater than 0 #sample estimates: # tau #0.4444444 ########################Large scale approximation cor.test(Hunter, Panel, method = "kendall", alternative = "greater",exact=FALSE) # Kendall's rank correlation tau #data: Hunter and Panel #z = 1.6681, p-value = 0.04765 #alternative hypothesis: true tau is greater than 0 #sample estimates: # tau #0.4444444 #####Distribution free Confidence interval based on Kendall statistic, section 8.3####################### ###Programming it is a HW problem. ###################Remark: To get the reuslts of the book you need to load the package Kendall. ###################In particular we get library(Kendall) summary(Kendall(Hunter,Panel)) #Score = 16 , Var(Score) = 92 #denominator = 36 #tau = 0.444, 2-sided pvalue =0.11785 ########################################################## ## Spearman test for independence based on signs, Data are from Table 8.1. This is section 8.5. Hunter <- c(44.4, 45.9, 41.9, 53.3, 44.7, 44.1, 50.7, 45.2, 60.1) Panel <- c( 2.6, 3.1, 2.5, 5.0, 3.6, 4.0, 5.2, 2.8, 3.8) plot(Hunter, Panel) cor(Hunter, Panel) # Pearson's r #[1] 0.5711816 # Spearman's rho cor(Hunter, Panel, method="spearman") #[1] 0.6 H<-rank(Hunter) P<-rank(Panel) cbind(H, P) # H P # [1,] 3 2 # [2,] 6 4 # [3,] 1 1 # [4,] 8 8 # [5,] 4 5 # [6,] 2 7 # [7,] 7 9 # [8,] 5 3 # [9,] 9 6 plot(H, P) cor(H, P) #[1] 0.6 S <- sum((H - P)^2) S #[1] 48 #[1] 0.6 cor.test(Hunter, Panel, method="spearman", alt="greater") # Spearman's rank correlation rho #data: HunterL and Panel #S = 48, p-value = 0.0484 #alternative hypothesis: true rho is greater than 0 #sample estimates: #rho #0.6 ######################################################################################################################## ######################################################################################################################## # Chapter 9: Regression Problems ### Theil test for the slope of the regression line. Data are from Table 9.1. This is section 9.1-9.3. ### we test beta=b x <- c(1,2,3,4,5) Y <- c(1.26, 1.27, 1.12, 1.16, 1.03) b<-0 D<-Y-b*x n<-length(x) cor(x, D, method="kendall") # [1] -0.6 pos <- 0 neg <- 0 for (i in 1:(n-1)) for (j in (i+1):n) { if ((x[j]-x[i])*(D[j]-D[i]) > 0) pos <- pos+1 else neg <- neg +1} pos #[1] 2 neg #[1] 8 2*(pos-neg)/(n*(n-1)) # [1] -0.6 #####################Remark: To get the reuslts of the book you need to load the package Kendall. In particular we get library(Kendall) summary(Kendall(x,D)) #Score = -6 , Var(Score) = 16.66667 #denominator = 10 #tau = -0.6, 2-sided pvalue =0.22067 ### A slope estimator associated with the Theil statistic. Section 9.2 ###Programming it is a HW problem. BetaHat # [1] -0.05625 #####################Large scale approximation cor.test(x, D, method = "kendall", alternative = "greater",exact=FALSE,conf.int=TRUE) # Kendall's rank correlation tau #data: x and D #z = -1.4697, p-value = 0.9292 #alternative hypothesis: true tau is greater than 0 #sample estimates: # tau #-0.6