### R examples for nonparametric statistics
### Source: Hollander M, Wolfe D, Nonparametric statistical methods, 2nd edition, Wiley
### Course: APMA168-Nonparametric Statistics, Brown University
### Instructor: Konstantinos Spiliopoulos

##The output from R is in comments, so that you won't get errors after copy-pasting.

#####################################################################################################################
#####################################################################################################################
#  Chapter 2:  Binomial probabilities and tests 


######################### An example of binomial probability

 2*(1-pbinom(6, 10, .5))
#[1] 0.34375
 2*(pbinom(3, 10, .5))
#[1] 0.34375


######################### A binomial test

 
 binom.test(7, 10, .5)

#        Exact binomial test

#data:  7 and 10 
#number of successes = 7, number of trials = 10, p-value = 0.3438
#alternative hypothesis: true probability of success is not equal to 0.5 
#95 percent confidence interval:
# 0.3475471 0.9332605 
#sample estimates:
#probability of success 
#                   0.7 

### if you need help with any command, for example with "pbinom" you can use

 help("pbinom")


######################################################################################################################
######################################################################################################################
# Chapter 3:  The One-Sample Location problem, sign tests, Wilcoxon test


###Paired replicates analysis by way of signed ranks,section 3.1-3.3.  Wilcoxon Signed-Rank Test
###Data is from table 3.1


 X<-c(1.83, 0.5, 1.62, 2.48, 1.68, 1.88, 1.55, 3.06, 1.3)
 Y<-c(0.878, 0.647, 0.598, 2.05, 1.06, 1.29, 1.06, 3.14, 1.29)
 Z<-Y-X
 help("wilcox.test")

 wilcox.test(Z, alternative = c("two.sided"),mu=0, conf.int=TRUE, exact=TRUE,conf.level = 1-0.049)

#        Wilcoxon signed rank test

#data:  Z 
#V = 5, p-value = 0.03906
#alternative hypothesis: true location is not equal to 0 
#95.1 percent confidence interval:
# -0.786 -0.010 
#sample estimates:
#(pseudo)median 
#         -0.46 


################Large sample approximation

 n<-length(Z)
 R <- rank(abs(Z))
 R
#[1] 8 3 9 4 7 6 5 2 1
 T <- sum(R[Z> 0])
 T
#[1] 5


 T1 <- (T  - (n*(n+1)/4))/(sqrt(n*(n+1)*(2*n+1)/24))
 pnorm(T1) #one-sided test

#[1] 0.0192 ##smallest significance at which we can reject Ho in favor of the one sided H1

 2*pnorm(T1) #two sided test

#0.0382 ##smallest significance at which we can reject Ho in favor od the two sided H1

################# Paired replicate analysis by way of signs, sections 3.4-3.7
#########             ---Data are from Table 3.5---

 XDark<-c(5.8, 13.5, 26.1, 7.4, 7.6, 23, 10.7, 9.1, 19.3, 26.3, 17.5, 17.9, 18.3, 14.2, 55.2, 15.4, 30, 21.3, 26.8, 8.1, 24.3, 21.3, 18.2, 22.5, 31.1)
 Yillumination<-c(5,21,73,25,3,77,59,13,36,46,9,25,59,38,70,36,55,46,25,30,29,46,71,31,33)
 Z<-Yillumination-XDark
 Z
# [1] -0.8  7.5 46.9 17.6 -4.6 54.0 48.3  3.9 16.7 19.7 -8.5  7.1 40.7 23.8 14.8 20.6 25.0 24.7 -1.8 21.9  4.7 24.7 52.8  8.5  1.9
 n<-length(Z)
 B <- length(Z[Z>0.0])
 B
# [1] 21


 binom.test(B,n, 0.5, alternative=c("greater"),conf.level=1-0.0216)

#        Exact binomial test

#data:  B and n 
#number of successes = 21, number of trials = 25, p-value = 0.0004553
#alternative hypothesis: true probability of success is greater than 0.5 
#97.84 percent confidence interval:
# 0.6329661 1.0000000 
#sample estimates:
#probability of success 
#                  0.84 


# Since the true probability of success is greater that 0.5, this means that we reject Ho: theta=0 in favor of H1:theta>0. 
# Note that in place of "greater" one can specify any one of "two.sided" "less"


#############Section 3.5--Estimator---
 thetaHat<-median(Z)
 thetaHat


##############Section 3.6---Confidence Interval---
OrderedZ<-sort(as.vector(Z)) 
conf.level <- 0.8922
n <- length(Z)
alpha <- 1 - conf.level
b <- qbinom(alpha / 2, n, 1/2,lower.tail = FALSE)
c(OrderedZ[n+1-b], OrderedZ[b])#Confidence interval. The difference from the book is due to summetry.


######################## Large sample approximation

 Z <- (B - (n/2))/(sqrt(n/4))
 Z
#[1] 3.4

#The approximate p-value is
 1-pnorm(Z)
#[1] 0.000337


### With the binomial correction to the large sample approximation the results do not change much:
 
 Z <- (B  - (n/2))/(sqrt(n/4))
 Z
#[1] 3.6

  1-pnorm(Z)

#[1] 0.9998409


### Note: In the case of ties we can use the same procedure.

### One usually follows the following procedure for dealing with zero differences 
### in the sign test. After defining the vector z of differences, use the commands

Z <- Z[Z != 0]
n <- length(z)

### It treates zero differences as if they were not part of the data. If the hypothesized value mu 
### if not zero then replace 0 by mu above, i.e.

Z <- Z[Z != mu]
n <- length(z)

############################################
 ###Chapter 3, problems with ties.
 ###Let's study the problem of Table 3.4
 ###First we will use the wilcox.test command and we will see that we get the wrong answer
 ###The reason is that the z  vector has zero values!!!!Never use wilcox.test or wilcox.exact 
 ###to caluclate estimators or confidence intervals in the presence of ties!!!


 x<-c(349,400,520,490,574,427,435)
 y<-c(425,533,362,628,463,427,449)
 z<-y-x

 ###wilcox.test gives us
 wilcox.test(z,alternative=c("two.sided"),mu=0,conf.int=TRUE,exact=FALSE,correct=TRUE,conf.level=1-0.078)

#        Wilcoxon signed rank test with continuity correction

#data:  z 
#V = 12, p-value = 0.834
#alternative hypothesis: true location is not equal to 0 
#92.2 percent confidence interval:
# -134.5000  133.0000 
#sample estimates:
#(pseudo)median 
#      12.70899 

##The median and the confidence interval are wrong!!This is because z vector has zero values. Indeed
 z

#[1]   76  133 -158  138 -111    0   14

 ###Let's see how we can program it
 z <- sort(y - x)
 walsh <- outer(z, z, "+") / 2
 print(walsh <- sort(walsh[lower.tri(walsh, diag = TRUE)]))
# [1] -158.0 -134.5 -111.0  -79.0  -72.0  -55.5  -48.5  -41.0  -17.5  -12.5  -10.0    0.0    7.0   11.0   13.5   14.0   38.0   45.0   66.5   69.0   73.5
#[22]   76.0   76.0  104.5  107.0  133.0  135.5  138.0
 median(walsh)
#[1] 12.25
 ##This is the correct estimator. 

 ##Now for the confidence interval one has to experiment a bit in order to achieve and exact 0.922 confidence level. The code is as follows
 conf.level <- 0.918
 m <- length(walsh)
 m
#[1] 28
 alpha <- 1 - conf.level
 n<-length(z)
 alpha <- 1 - conf.level
 k <- qsignrank(alpha / 2, n)
 if (k == 0) k <- k + 1
 print(k)
#[1] 4
 cat("achieved confidence level:",1 - 2 * psignrank(k - 1, n), "\n")
#achieved confidence level: 0.921875 
 c(walsh[k], walsh[m + 1 - k])
#[1] -79 107
 ####This is the correct answer for the two.sided confidence interval.
 
 ##Of course, all these were programmed using thr powerful commands from R. One could be more premitive (!!!) and use for loops. For example, 
 ##the estimator could also be estimated using the following code
 W<-NULL
 for (i in 1:7) 
 {
   for (j in i:7)
    {
     W<-c(W, (z[i]+z[j])/2)
    } 
 }
 walsh.alt<-median(W)
 walsh.alt
#[1] 12.25
 ####the same asnwer of course!!!!!


######################################################################################################################
######################################################################################################################
# Chapter 4:  The Two-Sample Location problem, 

### Mann-Whitney/Wilcoxon Test of location. Data is from Table 4.1.


 x <- c(0.80, 0.83, 1.89, 1.04, 1.45, 1.38, 1.91, 1.64, 0.73, 1.46)
 y <- c(1.15, 0.88, 0.90, 0.74, 1.21)

####Section 4.1###########################################################

### A nice way to handle this nonparametric procedure is as follows:

 mu <- 0 # hypothesized value of median difference
 m <- length(x) #The length of the vector x
 n <- length(y) #The length of the vector y
 y <- y - mu    #Substracting the mean   
 data <- c(x, y)#Combining both vectors in one
 names(data) <- c(rep("x", m), rep("y", n))
 names(data)
# [1] "x" "x" "x" "x" "x" "x" "x" "x" "x" "x" "y" "y" "y" "y" "y"
 data <- sort(data) #sorting the data from smaller to larger
 r <- rank(data)    #putting ranks.
 rbind(data, r)     #Combining them in one matrix
#        x    y   x    x    y   y    x    y    y     x     x     x     x     x     x
#data 0.73 0.74 0.8 0.83 0.88 0.9 1.04 1.15 1.21  1.38  1.45  1.46  1.64  1.89  1.91
#r    1.00 2.00 3.0 4.00 5.00 6.0 7.00 8.00 9.00 10.00 11.00 12.00 13.00 14.00 15.00
 
  w <- sum(r[names(data) == "y"]) #Wilcoxon form defined in equation (4.3) in Hollander and Wolfe 
  w
# [1] 30
  u <- w - n * (n + 1) / 2  #Mann-Whitney form defined in equation (4.15) in Hollander and Wolfe
  u
# [1] 15

 pwilcox(u, m, n)#The p-value of the Wilcoxon test
# [1] 0.1272061


### Remark: For a one-sided upper-tail test the last line would be replaced by 

 1 - pwilcox(u - 1, nx, ny)

### Remark: For a two-sided test do both the lower-tail test and the upper-tail test and
### double the P-value of the smaller of the two results. 
### This is because two tails is twice one tail because of the symmetry of the null distribution of the test statistic.


#############Section 4.2#################################################################
 
### The Hodges-Lehmann estimator associated with the rank sum test can be found as follows

 differences <- sort(as.vector(outer(y, x, "-"))) 
 DeltaHat<-median(differences)
#[1] -0.305


### The command outer(y, x, "-") needs some exmplanation. It is easier to understand via an example.
### Basically it gives us all the possible differences using a one line command instead of using for loops!

 x
# [1] 0.80 0.83 1.89 1.04 1.45 1.38 1.91 1.64 0.73 1.46
 y
# [1] 1.15 0.88 0.90 0.74 1.21
 outer(y, x, "-")
#      [,1]  [,2]  [,3]  [,4]  [,5]  [,6]  [,7]  [,8] [,9] [,10]
#[1,]  0.35  0.32 -0.74  0.11 -0.30 -0.23 -0.76 -0.49 0.42 -0.31
#[2,]  0.08  0.05 -1.01 -0.16 -0.57 -0.50 -1.03 -0.76 0.15 -0.58
#[3,]  0.10  0.07 -0.99 -0.14 -0.55 -0.48 -1.01 -0.74 0.17 -0.56
#[4,] -0.06 -0.09 -1.15 -0.30 -0.71 -0.64 -1.17 -0.90 0.01 -0.72
#[5,]  0.41  0.38 -0.68  0.17 -0.24 -0.17 -0.70 -0.43 0.48 -0.25

#############Section 4.3###################################################################################

### The Associated Confidence Interval (Moses) can be found as follows:

conf.level <- 0.95
Ldiff <- length(differences)
alpha <- 1 - conf.level
w <- qwilcox(alpha / 2, m, n)
if (w == 0) w <- w + 1
print(k)
cat("achieved confidence level:",
    1 - 2 * pwilcox(w - 1, m, n), "\n")
c(differences[w], differences[Ldiff + 1 - w])#Confidence interval. The difference from the book is due to summetry.


#achieved confidence level: 0.96004 
#[1] -0.76  0.15

######################Using a wilcox.test to do the same thing###############################################

 wilcox.test(y, x, alternative = "two.sided",exact = TRUE, correct = TRUE, conf.int=TRUE, conf.level=1-0.04)

#        Wilcoxon rank sum test

#data:  y and x 
#W = 15, p-value = 0.2544
#alternative hypothesis: true location shift is not equal to 0 
#96 percent confidence interval:
# -0.76  0.15 
#sample estimates:
#difference in location 
#                -0.305 


### R defines W as sum of first sample's ranks minus minimum

 m <- length(x)
 n <- length(y)
 XY <- c(x, y)
 XY
# [1] 0.80 0.83 1.89 1.04 1.45 1.38 1.91 1.64 0.73 1.46 1.15 0.88 0.90 0.74 1.21
 R <- rank(AB)
 R
# [1]  3  4 14  7 11 10 15 13  1 12  8  5  6  2  9
 W <- sum(R[(m+1):(m+n)])
 W
#[1] 30

 W1 <- sum(R[1:m])
 W1
#[1] 90
 W1 - sum(1:m)
#[1] 35

########################Large scale approximation

 wilcox.test(x, y, alternative = "less",exact = FALSE, correct = FALSE) # H&W large sample approximation

#        Wilcoxon rank sum test

#data:  x and y 
#W = 35, p-value = 0.8897
#alternative hypothesis: true location shift is less than 0 

 wilcox.test(rnorm(10), rnorm(10, 2), conf.int = TRUE)

#        Wilcoxon rank sum test

#data:  rnorm(10) and rnorm(10, 2) 
#W = 4, p-value = 0.0001299
#alternative hypothesis: true location shift is not equal to 0 
#95 percent confidence interval:
# -3.484645 -1.719936 
#sample estimates:
#difference in location 
#             -2.669803 

#################Section 4.4##################################################

## Programmin the FlingerPolicello function is one of your homework problems.


> X<-c(297,340,325,227,277,337,250,290)
> Y<-c(293,291,289,430,510,353,318)
> FlingerPolicello(X,Y)

$P
[1] 3 4 4 0 0 4 0 1

$Q
[1] 4 4 3 8 8 8 5

$Pbar
[1] 2

$Qbar
[1] 5.714286

$V1
[1] 26

$V2
[1] 29.42857

$U
[1] 1.467599


######################################################################################################################
######################################################################################################################
# Chapter 5:  The Two-Sample Dispersion problem and other two sample problems 

##############################Ansari-Bradley Test of Dispersion, section 5.1

### Two methods of testing iron concetration in blood serum
### All measurements are from a control serum with true value 105
 
 Ramsay <- c(111, 107, 100, 99, 102, 106, 109, 108, 104, 99, 101, 96, 97, 102, 107, 113, 116, 113, 110, 98)
 Jung.Parekh <- c(107, 108, 106, 98, 105, 103, 110, 105, 104, 100,96, 108, 103, 104, 114, 114, 113, 108, 106, 99)
 
 median(Ramsay)
#[1] 105
 median(Jung.Parekh)
#[1] 105.5
 
 stripchart(list(Ramsay, Jung.Parekh), method="j", group.names=c("Ramsay", "Jung-Parekh"))
 
 wilcox.test(Ramsay, Jung.Parekh)

#        Wilcoxon rank sum test with continuity correction
#data:  Ramsay and Jung.Parekh 
#W = 185.5, p-value = 0.7044
#alternative hypothesis: true location shift is not equal to 0 

#Warning message:
#In wilcox.test.default(Ramsay, Jung.Parekh) :
#  cannot compute exact p-value with ties
 
i
# [1] 111 107 100  99 102 106 109 108 104  99 101  96  97 102 107 113 116 113 110
#[20]  98 107 108 106  98 105 103 110 105 104 100  96 108 103 104 114 114 113 108
#[39] 106  99
 
 R
# [1] 34.0 25.0  9.5  7.0 12.5 22.0 31.0 28.5 17.0  7.0 11.0  1.5  3.0 12.5 25.0
#[16] 36.0 40.0 36.0 32.5  4.5 25.0 28.5 22.0  4.5 19.5 14.5 32.5 19.5 17.0  9.5
#[31]  1.5 28.5 14.5 17.0 38.5 38.5 36.0 28.5 22.0  7.0
 
 R[R > N/2] <- (N+1) - R[R > N/2]
 R
# [1]  7.0 16.0  9.5  7.0 12.5 19.0 10.0 12.5 17.0  7.0 11.0  1.5  3.0 12.5 16.0
#[16]  5.0  1.0  5.0  8.5  4.5 16.0 12.5 19.0  4.5 19.5 14.5  8.5 19.5 17.0  9.5
#[31]  1.5 12.5 14.5 17.0  2.5  2.5  5.0 12.5 19.0  7.0
 
 sum(R[1:m])
#[1] 185.5
 
 ansari.test(Ramsay, Jung.Parekh)

#        Ansari-Bradley test

#data:  Ramsay and Jung.Parekh 
#AB = 185.5, p-value = 0.1815
#alternative hypothesis: true ratio of scales is not equal to 1 

#Warning message:
#In ansari.test.default(Ramsay, Jung.Parekh) :
#  cannot compute exact p-value with ties
 
 ansari.test(Ramsay, Jung.Parekh, "less") # One-sided, check if Jung.Parekh  has greater dispersion

#        Ansari-Bradley test

#data:  Ramsay and Jung.Parekh 
#AB = 185.5, p-value = 0.9093
#alternative hypothesis: true ratio of scales is less than 1 

#Warning message:
#In ansari.test.default(Ramsay, Jung.Parekh, "less") :
#  cannot compute exact p-value with ties


##Warning: Above we had that length(X)=lemgth(Y)
##If they are not equal in length then label Y to be the one with the less observations
##Then the code should be the following. This is problem 5.1 and here N is odd. So we need to take N/2+1
> x <- c(0.80, 0.83, 1.89, 1.04, 1.45, 1.38, 1.91, 1.64, 0.73, 1.46)
> y <- c(1.15, 0.88, 0.90, 0.74, 1.21)
> xy<-c(x,y)
> m<-length(x)
> n<-length(y)
> R <- rank(xy)
> N <- m + n
> R
> R[R > N/2+1] <- (N+1) - R[R > N/2+1]
> R
 [1] 3 4 2 7 5 6 1 3 1 4 8 5 6 2 7
> sum(R[(m+1):(m+n)])
[1] 28


#####################################Miller Test for Dispersion Based on Jackknife-5.2.


## Programmin the MillerTest_5.2 function is one of your homework problems.


###Data are from Table 5.2.

 X<-c(6.2,5.9,8.9,6.5,8.6)
 Y<-c(9.5,9.8,9.5,9.6,10.3)
 MillerTest_5.2(X,Y)
$Abar
[1] 0.7522871

$Bbar
[1] -1.385421

$V_A
[1] 0.1139820

$V_B
[1] 2.366502

$Q
[1] 1.357314


#########Test for either location or dispersion. Section 5.3


###Prgramming the Lepage test statistic is one of your homework problems.

###########################################Kolmogorov-Smirnov Test


 ###WARNING  Number 1::::::::::::R calculates the quantity D=max_i |F_m(Z_i)- G_n(Z_i)|
 ###namely it does not take into account the prefactor mn/d. So you should be
 ##careful when converting things to the results of the book. The p-value that
 ## R gives is correct for this particular formulation.
 
 ###WARNING Number 2::::::::::::in the two-sample case alternative="greater" includes distributions 
 ###for which x is stochastically smaller than y (the CDF 
 ###of x lies above and hence to the left of that for y), in contrast to wilcox.test.
 #If you want wilcox.test(x, y, alternative = "less"), then its corresponding thing is
 #ks.test(x, y, alternative = "greater"). So,if you know what is going on, you will be fine.
 
 Ramsay <- c(111, 107, 100, 99, 102, 106, 109, 108, 104, 99, 101, 96, 97, 102, 107, 113, 116, 113, 110, 98)
 Jung.Parekh <- c(107, 108, 106, 98, 105, 103, 110, 105, 104, 100,96, 108, 103, 104, 114, 114, 113, 108, 106, 99)
 plot(ecdf(Ramsay), do.points=F, verticals=T, xlim = c(90, 120))
 lines(ecdf(Jung.Parekh), do.points=F, verticals=T, col.h="red", col.v="red")
 
 ks.test(Ramsay, Jung.Parekh)

#        Two-sample Kolmogorov-Smirnov test

#data:  Ramsay and Jung.Parekh 
#D = 0.25, p-value = 0.5596
#alternative hypothesis: two-sided 

#Warning message:
#In ks.test(Ramsay, Jung.Parekh) : cannot compute correct p-values with ties


 help("ks.test")


 ks.test(Ramsay, Jung.Parekh,exact=TRUE)

#        Two-sample Kolmogorov-Smirnov test

#data:  Ramsay and Jung.Parekh 
#D = 0.25, p-value = 0.5596
#alternative hypothesis: two-sided 

#Warning message:
#In ks.test(Ramsay, Jung.Parekh, exact = TRUE) :
#  cannot compute correct p-values with ties


 ks.test(Ramsay, Jung.Parekh,alternative = c( "less"), exact=TRUE)

#        Two-sample Kolmogorov-Smirnov test

#data:  Ramsay and Jung.Parekh 
#D^- = 0.1, p-value = 0.8187
#alternative hypothesis: the CDF of x lies below that of y 

#Warning message:
#In ks.test(Ramsay, Jung.Parekh, alternative = c("less"), exact = TRUE) :
#  cannot compute correct p-values with ties


 ks.test(Ramsay, Jung.Parekh,alternative = c( "greater"), exact=TRUE)

#        Two-sample Kolmogorov-Smirnov test

#data:  Ramsay and Jung.Parekh 
#D^+ = 0.25, p-value = 0.2865
#alternative hypothesis: the CDF of x lies above that of y 

#Warning message:
#In ks.test(Ramsay, Jung.Parekh, alternative = c("greater"), exact = TRUE) :
#  cannot compute correct p-values with ties


######################################################################################################################
######################################################################################################################
####### Chapter 6:  The One-Way Layout

### Kruskal test for general alternatives, Data are from Table 6.1.

 normal <- c(2.9, 3.0, 2.5, 2.6, 3.2)   # normal subjects
 oadisease <- c(3.8, 2.7, 4.0, 2.4)           # subjects with obstructive airway disease
 asbestosis <- c(2.8, 3.4, 3.7, 2.2, 2.0)      # subjects with asbestosis
 kruskal.test(list(normal, oadisease, asbestosis))

#        Kruskal-Wallis rank sum test

#data:  list(normal, oadisease, asbestosis) 
#Kruskal-Wallis chi-squared = 0.7714, df = 2, p-value = 0.68


##It works fine with ties!!!!!!good news!!!!

######################################################################################################################
######################################################################################################################
## Chapter 7:  The Two-Way Layout

### Friedman test for general alternatives in a randomized complete block design. 
### This is section 7.1, data are from table 7.1


###  Comparison of three methods ("round out", "narrow angle", and
###  "wide angle") for rounding first base.  For each of 18 players
###  and the three method, the average time of two runs from a point on
###  the first base line 35ft from home plate to a point 15ft short of
###  second base is recorded.

RoundingTimes <-
matrix(c(5.40, 5.50, 5.55,
         5.85, 5.70, 5.75,
         5.20, 5.60, 5.50,
         5.55, 5.50, 5.40,
         5.90, 5.85, 5.70,
         5.45, 5.55, 5.60,
         5.40, 5.40, 5.35,
         5.45, 5.50, 5.35,
         5.25, 5.15, 5.00,
         5.85, 5.80, 5.70,
         5.25, 5.20, 5.10,
         5.65, 5.55, 5.45,
         5.60, 5.35, 5.45,
         5.05, 5.00, 4.95,
         5.50, 5.50, 5.40,
         5.45, 5.55, 5.50,
         5.55, 5.55, 5.35,
         5.45, 5.50, 5.55,
         5.50, 5.45, 5.25,
         5.65, 5.60, 5.40,
         5.70, 5.65, 5.55,
         6.30, 6.30, 6.25),
       nrow = 22,
       byrow = TRUE,
       dimnames = list(1 : 22,
                       c("Round Out", "Narrow Angle", "Wide Angle")))
friedman.test(RoundingTimes)


#       Friedman rank sum test

#data:  RoundingTimes 
#Friedman chi-squared = 11.1429, df = 2, p-value = 0.003805


##  strong evidence against the null that the methods are equivalent
##    with respect to speed


######################################################################################################################
######################################################################################################################
# Chapter 8:  The Independence Problem


#####Kendall test for independence based on signs, Data are from Table 8.1. This is section 8.1 and 8.2.


 Hunter <- c(44.4, 45.9, 41.9, 53.3, 44.7, 44.1, 50.7, 45.2, 60.1)
 Panel <- c( 2.6,  3.1,  2.5,  5.0,  3.6,  4.0,  5.2,  2.8,  3.8)
 
 plot(Hunter, Panel)

 cor(Hunter, Panel, method="kendall")    ##Kendall point estimate, section 8.2.
#[1] 0.4444444
 
 pos <- 0
 neg <- 0

 n<-length(Hunter)

 for (i in 1:(n-1)) for (j in (i+1):n)
	{ if ((Hunter[j]-Hunter[i])*(Panel[j]-Panel[i]) > 0) pos <- pos+1
	else neg <- neg +1}


 pos
#[1] 26
 neg
#[1] 10
 2*(pos-neg)/(n*(n-1))
#[1] 0.4444444
 
 cor.test(Hunter, Panel, method="kendall", alt="greater") ##Kendall test for independence based on signs

#        Kendall's rank correlation tau

#data:  HunterL and Panel 
#T = 26, p-value = 0.05972
#alternative hypothesis: true tau is greater than 0 
#sample estimates:
#      tau 
#0.4444444 


########################Large scale approximation

 cor.test(Hunter, Panel, method = "kendall", alternative = "greater",exact=FALSE)

#        Kendall's rank correlation tau

#data:  Hunter and Panel 
#z = 1.6681, p-value = 0.04765
#alternative hypothesis: true tau is greater than 0 
#sample estimates:
#      tau 
#0.4444444 

#####Distribution free Confidence interval based on Kendall statistic, section 8.3#######################

###Programming it is a HW problem.


###################Remark: To get the reuslts of the book you need to load the package Kendall. 
###################In particular we get

 library(Kendall)
 summary(Kendall(Hunter,Panel))

#Score =  16 , Var(Score) = 92
#denominator =  36
#tau = 0.444, 2-sided pvalue =0.11785


##########################################################
## Spearman test for independence based on signs, Data are from Table 8.1. This is section 8.5.

 Hunter <- c(44.4, 45.9, 41.9, 53.3, 44.7, 44.1, 50.7, 45.2, 60.1)
 Panel <- c( 2.6,  3.1,  2.5,  5.0,  3.6,  4.0,  5.2,  2.8,  3.8)
 
 plot(Hunter, Panel)
 
 cor(Hunter, Panel)      # Pearson's r
#[1] 0.5711816
 
 # Spearman's rho
 
 cor(Hunter, Panel, method="spearman")   
#[1] 0.6
 
 H<-rank(Hunter)
 P<-rank(Panel)
 cbind(H, P)
#      H P
# [1,] 3 2
# [2,] 6 4
# [3,] 1 1
# [4,] 8 8
# [5,] 4 5
# [6,] 2 7
# [7,] 7 9
# [8,] 5 3
# [9,] 9 6
 
 plot(H, P)
 
 cor(H, P)
#[1] 0.6
 
 S <- sum((H - P)^2)
 S
#[1] 48
 

#[1] 0.6
 
 cor.test(Hunter, Panel, method="spearman", alt="greater")

#        Spearman's rank correlation rho

#data:  HunterL and Panel 
#S = 48, p-value = 0.0484
#alternative hypothesis: true rho is greater than 0 
#sample estimates:
#rho 
#0.6

 
########################################################################################################################
######################################################################################################################## 
# Chapter 9:  Regression Problems

### Theil test for the slope of the regression line. Data are from Table 9.1. This is section 9.1-9.3.
### we test beta=b
 
 x <- c(1,2,3,4,5)
 Y <- c(1.26, 1.27, 1.12, 1.16, 1.03)
 b<-0
 D<-Y-b*x
 n<-length(x)

 cor(x, D, method="kendall")    
# [1] -0.6


 pos <- 0
 neg <- 0
 for (i in 1:(n-1)) for (j in (i+1):n)
	{ if ((x[j]-x[i])*(D[j]-D[i]) > 0) pos <- pos+1
	else neg <- neg +1}

 pos
#[1] 2

 neg
#[1] 8

 2*(pos-neg)/(n*(n-1))
# [1] -0.6

#####################Remark: To get the reuslts of the book you need to load the package Kendall. In particular we get 
 library(Kendall)
 summary(Kendall(x,D))
#Score =  -6 , Var(Score) = 16.66667
#denominator =  10
#tau = -0.6, 2-sided pvalue =0.22067


### A slope estimator associated with the Theil statistic. Section 9.2

###Programming it is a HW problem.

 BetaHat
# [1] -0.05625


#####################Large scale approximation

 cor.test(x, D, method = "kendall", alternative = "greater",exact=FALSE,conf.int=TRUE)

#        Kendall's rank correlation tau

#data:  x and D 
#z = -1.4697, p-value = 0.9292
#alternative hypothesis: true tau is greater than 0 
#sample estimates:
# tau 
#-0.6