R Lesson #4 - Structured objects
So far only basic ('atomic') vectors have been described. Recall that there are four basic types that are very common: logical, integer, numeric, and character. This lesson delves into structures that have extended properties beyond basic vectors.# some tricky R behavior
> x <- numeric(20) # initialize a length 20 numeric vector
> x[] # select all elements by leaving index empty
[1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
> x[integer()] # select no elements with zero length index
numeric(0)
> x[] <- 1:5 # fill x with 1:5 repeated (recycling)
> x
[1] 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5
> x <- 1:5 # replaces x
> x
[1] 1 2 3 4 5
x[25] <- 200 # fills remainder with NA
> x
[1] 1 2 3 4 5 NA NA NA NA NA NA NA
[13] NA NA NA NA NA NA NA NA NA NA NA NA
[25] 200
> x + 5 # NA values do not participate
[1] 6 7 8 9 10 NA NA NA NA NA NA NA
[13] NA NA NA NA NA NA NA NA NA NA NA NA
[25] 205
> is.na(x)
[1] FALSE FALSE FALSE FALSE FALSE TRUE TRUE TRUE
[9] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[17] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[25] FALSE
>
> x[length(x) + 1] <- 5 # this is very slow
> # it is better to initialize the required size
> x <- numeric(26) # then we can fill elements
> x[26] <- 5 # much faster
# structured class: matrix - two dimensions
> m <- matrix(0L)
> m
[,1]
[1,] 0
> typeof(m)
[1] "integer"
> class(m)
[1] "matrix"
> m <- matrix(1:4, nrow=2)
> m # R stores matrices by column-first order
[,1] [,2]
[1,] 1 3
[2,] 2 4
> m[3] # matrices can be accessed like vectors
[1] 3
> m <- matrix(1:4, nrow=2, byrow=TRUE)
> m # but you can force it to initialize by rows first
[,1] [,2]
[1,] 1 2
[2,] 3 4
> m[3] # still gives you the same position
[1] 2
>
> m[1, 1] # select an element
[1] 1
> m[1,] # no column index means every column
[1] 1 2
> m[-1,] # select everything but the first row
[1] 3 4
> class(m[, -1]) # not a matrix anymore!
[1] "integer"
> m[, -1, drop=FALSE] # don't drop matrix class
[,1]
[1,] 2
[2,] 4
> class(m[, -1, drop=FALSE])
[1] "matrix"
> m[, 2]
[1] 2 4
>
> m <- matrix(1:10, nrow=5)
> dim(m) # dimensions of the matrix
[1] 5 2
> which(m==7)
[1] 7
> which(m==7, arr.ind=TRUE) # result is a matrix
row col
[1,] 2 2
> w <- which(m > 7 | m < 3)
> w
[1] 1 2 8 9 10
> m[w]
[1] 1 2 8 9 10
> which.max(m)
[1] 10
> which(m==max(m), arr.ind=T)
row col
[1,] 5 2
>
> # common functions for matrices:
> sum(m)
[1] 55
> rowSums(m)
[1] 7 9 11 13 15
> colSums(m)
[1] 15 40
> mean(m)
[1] 5.5
> rowMeans(m)
[1] 3.5 4.5 5.5 6.5 7.5
> colMeans(m)
[1] 3 8
> diag(m) # note this works with any rectangular matrix
[1] 1 7
# structured class: array - arbitrary dimensions
> a <- array(1:27, dim=c(3, 3, 3))
> a # note how the first dimensions are filled in order
, , 1
[,1] [,2] [,3]
[1,] 1 4 7
[2,] 2 5 8
[3,] 3 6 9
, , 2
[,1] [,2] [,3]
[1,] 10 13 16
[2,] 11 14 17
[3,] 12 15 18
, , 3
[,1] [,2] [,3]
[1,] 19 22 25
[2,] 20 23 26
[3,] 21 24 27
> a[1, 2, 3]
[1] 22
> a[1, 1,]
[1] 1 10 19
> class(a[1, 1,])
[1] "integer"
> class(a[1, 1,, drop=F])
[1] "array"
> a[1:2, 1:2, 1]
[,1] [,2]
[1,] 1 4
[2,] 2 5
> class(a[1:2, 1:2, 1]) # becomes a matrix
[1] "matrix"
> class(a[1:2, 1:2, 1, drop=F]) # still an array
[1] "array"
> colMeans(a) # returns a matrix
[,1] [,2] [,3]
[1,] 2 11 20
[2,] 5 14 23
[3,] 8 17 26
> rowSums(a)
[1] 117 126 135
> which(a==17, arr.ind=TRUE) # result is a matrix
dim1 dim2 dim3
[1,] 2 3 2
# structured class: list - data of different types
> l <- list() # initialize a list
> l # empty list
list()
> l[[1]] <- "hello" # note double brackets
> l
[[1]]
[1] "hello"
> l <- list(0L)
> l # not length zero!
[[1]]
[1] 0
> l[[2]] <- 10:12
> l
[[1]]
[1] 0
[[2]]
[1] 10 11 12
> l[[3]] <- "some characters"
> l # different data types
[[1]]
[1] 0
[[2]]
[1] 10 11 12
[[3]]
[1] "some characters"
>
> l[1] # select the first element as a list
[[1]]
[1] 0
> typeof(l[1])
[1] "list"
> l[[1]] # select contents of the first element
[1] 0
> typeof(l[[1]])
[1] "integer"
> l[[4]] <- m # keep adding to the list
> length(l) # number of list elements
[1] 4
> l[1:2]
[[1]]
[1] 0
[[2]]
[1] 10 11 12
> l[[1:2]] # cannot do this (might be different types)
Error in l[[1:2]] : subscript out of bounds
> unlist(l) # coerced to one type
[1] "0" "10"
[3] "11" "12"
[5] "some characters" "1"
[7] "2" "3"
[9] "4" "5"
[11] "6" "7"
[13] "8" "9"
[15] "10"
>
> # list can be nested with as many layers as desired
> l <- list(list(1), list(list(2)), list(list(3), list(4)))
> str(l) # look at the structure
List of 3
$ :List of 1
..$ : num 1
$ :List of 1
..$ :List of 1
.. ..$ : num 2
$ :List of 2
..$ :List of 1
.. ..$ : num 3
..$ :List of 1
.. ..$ : num 4
> l[[1]][[1]]
[1] 1
> l[[2]][[1]][[1]]
[1] 2
> l[[3]][[2]][[1]]
[1] 4
# structured class: data.frame - rectangular list
> X <- 1:10
> Y <- as.character(11:20)
> d <- data.frame(X, Y, stringsAsFactors=FALSE)
> typeof(d)
[1] "list"
> class(d)
[1] "data.frame"
> d[[2]]
[1] "11" "12" "13" "14" "15" "16" "17" "18" "19" "20"
> # can be accessed in the same manner as a matrix
> d[1, 2]
[1] "11"
> d[1,]
X Y
1 1 11
> typeof(d[1,])
[1] "list"
> # similar to a spreadsheet: one type per column
> typeof(d[, 1])
[1] "integer"
> typeof(d[, 2])
[1] "character"