R Lesson #2 - Basic data types and operators
The four primary types in the R language are logicals, integers, doubles, and characters. Each of these types is stored differently in binary form on the computer. Unless forced to do otherwise, all numbers are stored as doubles (64 bits) in R.x <- "Hello, World!"
> x # prints x
[1] "Hello, World!"
>
> typeof(x)
[1] "character"
> class(x)
[1] "character"
> object.size(x)
104 bytes
>
> 1.2
[1] 1.2
> y <- 1.2
> y
[1] 1.2
> y = 1.2
> y
[1] 1.2
> print(y) # same as above
[1] 1.2
# note that R hides the finite precision
> print(y, digits=22) # specifying another argument
[1] 1.199999999999999955591
>
> typeof(y)
[1] "double"
> class(y)
[1] "numeric"
> object.size(y)
48 bytes
z <- c(1, 2, 3, 4, 5, 6) # `c` is the combine function
> z
[1] 1 2 3 4 5 6
> typeof(z)
[1] "double"
> object.size(z)
88 bytes
>
> # the overhead stays the same (40 bytes)
> object.size(z) - length(z)*8 # (8 bytes/number)
40 bytes
>
> z <- c(1L, 2L, 3L, 4L, 5L, 6L) # integer literals
> typeof(z)
[1] "integer"
>
> # integers require less memory to store
> # R currently stores each integer in 4 bytes
> object.size(z)
72 bytes
>
> z <- 1:5 # shorthand for integer sequence
> typeof(z) # still integer
[1] "integer"
> z <- seq(1, 5)
> typeof(z)
[1] "integer"
> z <- seq(1, 5, by=1) # setting the `by` argument
> typeof(z)
[1] "double"
> z <- seq(1L, 5L, by=1L)
> typeof(z)
[1] "integer"
> z <- seq(1, 5, length.out=5) # setting `length.out`
> typeof(z)
[1] "double"
> z <- seq(1L, 5L, length.out=5L)
> typeof(z)
[1] "double"
>
> s <- sum(z)
> typeof(s) # keeps the same type as z
[1] "double"
>
> sum(1:100000) # Warning: too big of an integer!
[1] NA
Warning message:
In sum(1:1e+05) : Integer overflow - use sum(as.numeric(.))
> # the biggest possible signed integer in R is (2^31 - 1)
> as.integer(2^31 - 1)
[1] 2147483647
> as.integer(2^31) # NA
[1] NA
Warning message:
NAs introduced by coercion to integer range
>
> # doubles can store larger numbers
> sum(as.numeric(1:100000))
[1] 5000050000
a <- z + y
> a
[1] 2.2 3.2 4.2 5.2 6.2
> typeof(a)
[1] "double"
> class(a)
[1] "numeric"
>
> sum(a)
[1] 21
> max(a)
[1] 6.2
> min(a)
[1] 2.2
> print(a, digits=22)
[1] 2.200000000000000177636 3.200000000000000177636
[3] 4.200000000000000177636 5.200000000000000177636
[5] 6.200000000000000177636
class(a) <- "foo" # the class can be anything
> print(a) # note the alternative behavior
[1] 2.2 3.2 4.2 5.2 6.2
attr(,"class")
[1] "foo"
> # change it back to the original class
> class(a) <- "numeric"
> print(a) # displays the original behavior
[1] 2.2 3.2 4.2 5.2 6.2
>
> # classes can be coerced with `as` functions
> as.numeric(a) # switches class (and therefore type)
[1] 2.2 3.2 4.2 5.2 6.2
> as(a, "numeric") # general purpose coercion
[1] 2.2 3.2 4.2 5.2 6.2
> as.character(a) # coercion to character
[1] "2.2" "3.2" "4.2" "5.2" "6.2"
>
> # objects can also be initialized
> integer() # a zero-length integer
integer(0)
> integer(10) # get a vector of 10 zeros
[1] 0 0 0 0 0 0 0 0 0 0
> character(2) # get a vector of two ""
[1] "" ""
> b <- numeric(100)
> object.size(b)/length(b) # approaching 8 bytes each
8.4 bytes
# subsetting is performed with square brackets
> a[1]
[1] 2.2
> a[5]
[1] 6.2
> a[z]
[1] 2.2 3.2 4.2 5.2 6.2
> a[-1]
[1] 3.2 4.2 5.2 6.2
> a[-1] <- 5
> a # everything except the first element is now 5
[1] 2.2 5.0 5.0 5.0 5.0
> -z
[1] -1 -2 -3 -4 -5
> a[-z] # drop all elements
numeric(0)
>
> b # note the index is printed
[1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[25] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[49] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[73] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[97] 0 0 0 0
> b[1:3] <- 1
> b[length(b)] <- 10
> object.size(b)/length(b) # unchanged
8.4 bytes
>
> # head and tail are useful functions
> head(b) # the first size elements
[1] 1 1 1 0 0 0
> head(b, n=10)
[1] 1 1 1 0 0 0 0 0 0 0
> tail(b) # the last 6 elements
[1] 0 0 0 0 0 10
>
> # scientific notation is also possible
> b[(length(b) - 4):length(b)] <- 1e1
> tail(b)
[1] 0 10 10 10 10 10
> tail(b) <- 5 # Error! Cannot do this.
Error in tail(b) <- 5 : could not find function "tail<-"
> b[45:55] # look at the middle
[1] 0 0 0 0 0 0 0 0 0 0 0