###Some Basic commands in R ###Course: APMA168-Nonparametric Statistics, Brown University ###Instructor: Konstantinos Spiliopoulos ############################################################################################################### ##############################Basics########################################################################### x<-c(1,2,3,4,5) y<-c(3,5,6,7,8,9) y #[1] 3 5 6 7 8 9 y1<-y[y != 7] #is the vector of elements of y excluding the element that has value 7 y1 #[1] 3 5 6 8 9 sin(y) #it applies the sin function to each element of y-vector #[1] 0.1411200 -0.9589243 -0.2794155 0.6569866 0.9893582 0.4121185 z<-c(x,y) #Combining the two vectors in one x[2] #extractring the 2nd ellement of the x-vector #[1] 2 mean(x) #The mean value of vector x #[1] 3 sd(x) #The standard deviation of vector x #[1] 1.581139 median(y) #The median of the vector y #[1] 6.5 objects() #In case you want to find out the objects that you have defined. [1] "x" "y" "z" hist(x) #ploting the histgram of vevtor x sequence<-seq(1,3, by=0.2)#It creates a sequence of numbers from 1 to 3 with step 0.2 #[1] 1.0 1.2 1.4 1.6 1.8 2.0 2.2 2.4 2.6 2.8 3.0 ############################################################################################################### #####Doing TESTS############################################################################################### t.test(x,alternative="greater",mu=5) # will test the null hypothesis mu=5 vs. the alternate that mu > 5. # One Sample t-test #data: x #t = -2.8284, df = 4, p-value = 0.9763 #alternative hypothesis: true mean is greater than 5 #95 percent confidence interval: # 1.492557 Inf #sample estimates: #mean of x # 3 #Besides just getting the result of a function, we can store the results somewhere. For example tx<-t.test(x,alternative="greater",mu=5) attributes(tx) # will show the various properties of tx. For example #$names #[1] "statistic" "parameter" "p.value" "conf.int" "estimate" #[6] "null.value" "alternative" "method" "data.name" # #$class #[1] "htest" tx$p.value # returns the p-value. #[1] 0.9762897 ############################################################################################################### ##############Generating random variables###################################################################### #Examples of generating random numbers following various discrete and continuous distributions normal<-rnorm(12,5,10) # [1] 16.4273175 13.6126756 -2.7347741 8.6122029 -16.7411542 1.4438525 # [7] -2.9433208 7.0977609 -3.1812454 26.7573497 8.5117967 -0.5879782 chisquare<-rchisq(200,4) pnorm(30, mean=50, sd=sqrt(400)) #This is P(X < 30) when X is normal with mean 50 and standard deviation 20. pnorm(30, 50, 20) #This is P(X < 30) when X is normal with mean 50 and standard deviation 20. #Remark: R wants the s. d. as the parameter, not the variance. We'll need to take a square root!!!!!!!!!!!!!! #[1] 0.1586553 Namely P(X < 30)=0.1586553 qnorm(0.95, mean = 100, sd = 15) # This will give the 95th quantile (or percentile) of the normal distribution #[1] 82.89707 ###A nice example is the following: x <- rnorm(1000, mean = 50, sd = 20) #generates 1000 i. i. d. normal random numbers hist(x, freq = FALSE) #plots their histogram curve(dnorm(x, mean = 50, sd = 20), add = TRUE) #graphs the p. d. f. of the same normal distribution #######Similarly for Binomial, Poisson, Exponential, Gamma, Beta, etc....Check out R-manual and help!!! ############################################################################################################### ######Some Graphing commands################################################################################### plot(x) plot(sin(x)) #One of the most useful commands is help("anything you want"). For example help("plot") ##Moreover, R supports for loops, if commands etc.... ##A nice way of creating plots and saving them somewhere in your computer is the following. ##In the example that follows we assume that we want to save our figure in ##C:\Documents and Settings\Kostas\Desktop with name Figure1 and type .eps ##It also shows how one can put multiple graphs in one figure x<-seq(-4,4,by=0.01) postscript("C:\\Documents and Settings\\Kostas\\Desktop\\Figure1.eps", horizontal=F,paper="special", height=8, width=9, pointsize=12) plot(x,(1/2)*x^2, xlim=c(-2,2), ylim=c(0,3), xlab="x",ylab=" V(x)", type="l", lty=1, col="black") lines(x,(1/2)*x^2+0.1*(cos(x/0.01)+sin(x/0.01)), type="l", lty=1, col="black") dev.off() ############################################################################################################### #######Defyning your own functions in R######################################################################## #The following command (function) defines a new function called trial1 that takes as input a vector x and returns #a new vector with the elements squared trial1 <- function(x) { x1 <- x^2 return(x1) } ####An application: y #[1] 3 5 6 7 8 9 trial1(y) #[1] 9 25 36 49 64 81 ##However x1 is defined as a local variable. This means that it is recognized only inside the function trial1. #For example, if we run x1 we get an error message: > x1 #Error: object 'x1' not found ################################################################################################################## #####For loops and if statements in R############################################################################# #A very useful command is the for loop: for (i in 1:10000) { ### some R statements that do something here } #does the same thing (whatever is done by the R statements inside) 10000 times. #An example. Suppose we want to simulate the mean of a random sample of #size n from the normal distribution n times. m <- 200 n <- 1000 MuHat <- double(n) for (j in 1:n) { x <- rnorm(m) MuHat[j] <- mean(x) } hist(MuHat, freq = FALSE) curve(dnorm(x, sd = sqrt(1 / m) ), add = TRUE) ###An example that combines a for loop and and if-elseif-else statement w<-seq(1,20,by=0.2) group<-NULL for (i in 1:length(w)) { group[i] <- if (w[i] <= 5) 1 else if (w[i] <= 12) 2 else 3 } group #[1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 #[39] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 #[77] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 ############################################################################################################### #Practice yourself and use the R-manual and Help as much as possible!!!!!!!!!!!!!!!