############################################################################### # # INTRODUCTION TO R # ############################################################################### # calculator (50 + 1.45)/12.5 # assignment operators x = 945 y <- sin(0.47)^2 * sqrt(5) #Used the most by R users y^2 -> z # to inspect the value of a variable simply type its name x y z # listing and deleting objects ls() rm(y) rm(x,z) # remove everything in the working environment rm(list=ls()) # # Vectors (the most basic data objects in R) # # creating vectors v <- c(14,7,23.5,76.2) v # generating a regular sequence of numbers v <- 1:10 v v <- seq(from=5, to=10, by=2) v w <- rep(v, times = 2) w # scalars are vectors with a single element w <- 45.0 w # vectors can be created using other vectors z <- c(v, 2.5, w) z ############################################################################### # # PROBLEMS: # # - construct a vector that contains elements: 1,2,3,...,19,20 # # - construct a vector that contains elements: 1,2,3,...,19,20,19,...,3,2,1 # # - construct a vector that contains elements: 1,3,5,1,3,5,...,1,3,5 # where there are 10 occurrences of element 5 # ############################################################################### ############################################################################### # # SOLUTIONS: # ############################################################################### v <- 1:20 v v <- c(1:20,19:1) v v <- rep(c(1,3,5), times = 10) v ############################################################################### v <- c(8, 4, 2, 3, 6, 9, 1) # some useful vector functions length(v) max(v) min(v) which.min(v) sum(v) mean(v) sd(v) rev(v) sort(v) sort(v, decreasing=T) order(v) # types of vectors mode(v) # logical vector - has logical constants as elements b <- c(TRUE, FALSE, F, T) b mode(b) x <- 5 > 3 x mode(x) # string vector - has strings as elements s <- c("character", "logical", "numeric", "complex") mode(s) # type coercion (all elements must be of the same type) x <- c(F, T, 34.56, 'aaa') x # # Vectorization # # vector arithmetic (operations are performed element-wise) v1 <- c(10,20,30,40) v2 <- 1:4 v1 + v2 v1 * v2 # functions operate directly on each element of a vector v1^2 sqrt(v1) exp(v1) log2(v1) # the recycling rule (if lengths are different the elements of the shorter vector are repeated) v1 * 10 v1 + 1 v1 + c(100, 200) ############################################################################### # # PROBLEMS: # # - calculate the values of sin(x) at 0, 0.1, 0.2, 0.3, ..., 1.0 # # - Suppose we measure the height and weight of ten individuals: # # #the vector of heights in 'cm' # height <- c(179, 185, 183, 172, 174, 185, 193, 169, 173, 168) # # # the vector of weights in 'kg' # weight <- c(95, 89, 70, 80, 92, 86, 100, 63, 72, 70) # # Calculate the body mass index (bmi) for each individual using the formula: # bmi = weight_in_kg / (height_in_m)^2 # # HINT: first convert heights from 'cm' to 'm', then use the formula above. # ############################################################################### ############################################################################### # # SOLUTIONS: # ############################################################################### v <- c(0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0) # or v <- (0:10)/10 # or v <- seq(from=0, to=1, by=0.1) sin(v) height <- c(179, 185, 183, 172, 174, 185, 193, 169, 173, 168) weight <- c(95, 89, 70, 80, 92, 86, 100, 63, 72, 70) bmi = weight / (height / 100)^2 bmi ############################################################################### # # Indexing # x <- c(-10,20,-30,40,-50,60,-70,80) x # individual elements can be addressed using an integer index vector # (indexing starts with 1) x[3] x[c(1,4,5)] x[1:3] x[] # negative integer indices address all elements but those stated x[-1] x[-c(4,6)] x[-(1:3)] # vector elements can be addressed using logical vectors # (elements corresponding to constants TRUE are selected) # logical vector x > 0 # logical vector indexing x[x>0] x[x <= -20 | x > 50] x[x > 40 & x < 100] # equality operator is == # inequality operator is != # the which() function returns indices corresponding to constants TRUE which(x > 0) # character string index vector point <- c(4.7, 3.6, 2.5) names(point) <- c('x', 'y', 'z') point point['x'] point[c('x','z')] # empty indices point[] <- 0 point # not the same as point <- 0 point # # Vector editing # x <- c("a", "b", "c", "d") # replacing an element x[2] <- "BBBBB" x x[c(1,3)] <- c("AAAAA", "CCCCC") x # adding new element x[length(x)+1] = "EEEEE" x # what happens if we do not define all elements in the vector? x[10] <- "FFFFF" x # which elements are not defined is.na(x) # removing elements x <- x[-c(1,3)] x x <- c(x[2],x[3]) x ############################################################################### # # PROBLEM: # # - given a vector: # x <- c(1, -2, 3, -4, 5, -6, 7, -8) # # Edit the vector x as follows. Replace all elements with a negative value # with 0. Multiply the elements with a positive value by 10. # ############################################################################### ############################################################################### # # SOLUTION: # ############################################################################### x <- c(1, -2, 3, -4, 5, -6, 7, -8) x[x < 0] <- 0 x[x > 0] <- x[x > 0] * 10 x ############################################################################### # # Factors # gender <- c("f","m","m","m","f","m","f") gender # factors are useful when modelling nominal variables gender <- factor(gender) gender # argument "levels" defines all possible elements' values dir <- factor(c('left','left','up'), levels = c('left','right','up','down')) dir # all possible elements' values levels(dir) # if no match is found dir[1] <- "diagonal" dir # valid assignment dir[1] <- "down" dir # frequency tables for factors table(gender) table(dir) # # Lists (an ordered collection of objects - components) # # creating a list student <- list(id=12345,name="Marko",marks=c(10,9,10,9,8,10)) student # extracting elements of a list (using named components) student$id student$name student$marks # extracting elements of a list (using indexing) student[[1]] student[[2]] student[[3]] # extending lists student$parents <- c("Ana", "Tomaz") student # # Data frames # # creating a data frame height <- c(179, 185, 183, 172, 174, 185, 193, 169, 173, 168) weight <- c(95, 89, 70, 80, 92, 86, 100, 63, 72, 70) gender <- factor(c("f","m","m","m","f","m","f","f","m","f")) student <- c(T, T, F, F, T, T, F, F, F, T) df <- data.frame(gender, height, weight, student) df # some important functions summary(df) names(df) nrow(df) ncol(df) # accessing elements of data frames df[5,] df[1:5,] df[,1] df[,c(1,3,4)] df[1,3] df[1,-3] df$height df[df$height < 180,] df[df$gender == "m",] # adding columns to a data frame df <- cbind(df, age = c(20, 21, 30, 25, 27, 19, 24, 27, 28, 24)) df df$name = c("Joan","Tom","John","Mike","Anna","Bill","Tina","Beth","Steve","Kim") df summary(df) ############################################################################### # # PROBLEMS: # # # - calculate the average age of persons in our dataset. # (HINT: use the meann function) # # - are there more males or females in our dataset? # (HINT: use table function) # # - write out persons that are also students. # # - write out persons who are between 1.8 to 1.9 m of height (inclusive). # # - write out students who are over the average height # (calculated on the whole dataset). # # - arrange persons by their age. (HINT: use the order function) # ############################################################################### ############################################################################### # # SOLUTIONS: # ############################################################################### mean(df$age) table(df$gender) df[df$student,] selection <- df$height >= 180 & df$height <= 190 df[selection,] # or directly df[df$height >= 180 & df$height <= 190,] df[df$student & df$height > mean(df$height),] df[order(df$age),] ###############################################################################