DECIPHER - R Lesson #12

R Lesson #12 - Loops

This lesson describes for and while loops, which can be used to repeatedly apply a block of code. In R, for loops iterate through a vector - typically an index. In contrast, while loops repeat until a condition is FALSE.

There are three main reasons to prefer a loop over an apply function: (1) When subsets are dependent (i.e., iteration i + 1 depends on i), (2) When modifying part of a structure in place, and (3) With tasks that might terminate before completion.

Hide output

# basic constructors:
> for (i in seq_len(n)) {
+    # do something
+ }
> 
> while (condition) { # execute if TRUE
+    # do something
+ }
> 
> for (i in seq_len(10)) {
+    print(i)
+ }
[1] 1
[1] 2
[1] 3
[1] 4
[1] 5
[1] 6
[1] 7
[1] 8
[1] 9
[1] 10
> 
> i <- 0L
> while (i < 10) {
+    i <- i + 1L
+    print(i)
+ }
[1] 1
[1] 2
[1] 3
[1] 4
[1] 5
[1] 6
[1] 7
[1] 8
[1] 9
[1] 10

When initializing a for loop, seq_along and seq_len are preferable to using seq or 1:n, because they properly handle the case when n is zero.

seq(1, 10)
 [1]  1  2  3  4  5  6  7  8  9 10
> 1:10
 [1]  1  2  3  4  5  6  7  8  9 10
> seq_len(10)
 [1]  1  2  3  4  5  6  7  8  9 10
> seq_along(1:10)
 [1]  1  2  3  4  5  6  7  8  9 10
> 
> a <- sample(10, 100, replace=TRUE)
> w <- which(a==5)
> seq(1, length(w))
[1] 1 2 3 4 5 6 7 8 9
> 1:length(w)
[1] 1 2 3 4 5 6 7 8 9
> for (i in 1:length(w)) {
+    cat("a[", w[i], "] = ", a[w[i]], "\n", sep="")
+ }
a[7] = 5
a[17] = 5
a[22] = 5
a[24] = 5
a[29] = 5
a[35] = 5
a[70] = 5
a[78] = 5
a[87] = 5
> seq_along(w)
[1] 1 2 3 4 5 6 7 8 9
> seq_len(length(w))
[1] 1 2 3 4 5 6 7 8 9
> for (i in seq_along(w)) {
+    cat("a[", w[i], "] = ", a[w[i]], "\n", sep="")
+ }
a[7] = 5
a[17] = 5
a[22] = 5
a[24] = 5
a[29] = 5
a[35] = 5
a[70] = 5
a[78] = 5
a[87] = 5
> 
> # seq and 1:n fail when n is an empty set:
> w <- which(a==0)
> seq(1, length(w))
[1] 1 0
> 1:length(w)
[1] 1 0
> for (i in 1:length(w)) {
+    cat("a[", w[i], "] = ", a[w[i]], "\n", sep="")
+ }
a[NA] = NA
a[] = 
> # seq_along and seq_len give an empty set:
> seq_along(w)
integer(0)
> seq_len(length(w))
integer(0)
> for (i in seq_along(w)) { # never executes
+    cat("a[", w[i], "] = ", a[w[i]], "\n", sep="")
+ }

Example: calculating the cumulative sum of a vector. The sum at any index in the resulting vector is the sum of all elements in the original vector up to that point.

y <- -10:10
> cumsum(y)
 [1]  -10 -19 -27 -34 -40 -45 -49 -52 -54 -55 -55 -54
 [13] -52 -49 -45 -40 -34 -27 -19 -10   0
> 
> result <- integer(length(y))
> for (i in seq_along(y)) {
+    if (i==1) {
+       result[i] <- y[i]
+    } else {
+       result[i] <- y[i] + result[i - 1]
+    }
+ }
> result
 [1]  -10 -19 -27 -34 -40 -45 -49 -52 -54 -55 -55 -54
 [13] -52 -49 -45 -40 -34 -27 -19 -10   0

A loop is useful for calculating an exponentially weighted moving average, where each iteration is dependent on the past iterations. See Wikipedia for an explanation of the formulas below.

# example - exponentially weighted moving average
> x <- rnorm(100L) # input vector
> s <- numeric(length(x)) # initialize smoothed vector
> a <- 0.1 # alpha (smoothing parameter)
> 
> for (i in seq_along(x)) {
+    if (i == 1) {
+       s[i] <- x[i]
+    } else {
+       s[i] <- a*x[i] + (1 - a)*s[i - 1]
+    }
+ }
> 
> plot(x, type="l",
+    ylab="value")
> points(s, type="l",
+    col="blue")
> legend("topleft",
+    c("x", "s"),
+    lty=1, bg="white",
+    col=c("black", "blue"))
> 
> # the same task can be done with a while loop:
> i <- 0L
> while (i < length(x)) {
+    i <- i + 1L # same as the for loop iterator
+    if (i == 1) {
+       s[i] <- x[i]
+    } else {
+       s[i] <- a*x[i] + (1 - a)*s[i - 1]
+    }
+ }
> points(s, col="red", type="l", lty=2)

All for loops can be easily converted into while loops but the opposite is not as simple. To convert a while loop into a for loop it may be necessary to use control-flow statements, such as next and break. The next statement will jump to the next iteration, and the break statement will jump out of the current loop.

# example - find the first occurrence > 2
> i <- 1L
> while (x[i] < 2) {
+    i <- i + 1L
+ }
> print(i)
[1] 19
> print(x[i])
[1] 2.34534
> 
> # to accomplish this using a for loop it is
> # necessary to use a control flow statement
> for (i in seq_along(x)) {
+    if (x[i] >= 2)
+       break # jump out of the loop
+ }
> print(i)
[1] 19
> print(x[i])
[1] 2.34534
> 
> it is also possible to loop over the elements
> without using any index directly
> for (i in x) { # note: no seq_along
+    if (i >= 2)
+       break
+ }
> print(i)
[1] 2.34534
> # but now `i` means something different:
> print(x[i]) # `i` gets truncated into an index!
[1] -1.214965
> 
> # an alternative control-flow statement is `next`
> I <- 0 # uppercase `i`
> for (i in seq_along(x)) {
+    if (x[i] < 2)
+       next # jump to the next iteration
+    
+    I <- i
+ }
> # this is an inefficient way of finding the last
> # element of x that meets the condition (>= 2)
> print(I)
[1] 90
> print(x[I])
[1] 2.225347

Note that the for loop is fixed from the beginning. This behavior differs between programming languages, but the looping vector cannot be altered in R after initialization.

# the iterator cannot be changed after initialization
> for (i in 1:10) {
+    print(i)
+ }
[1] 1
[1] 2
[1] 3
[1] 4
[1] 5
[1] 6
[1] 7
[1] 8
[1] 9
[1] 10
> for (i in 1:10) {
+    print(i)
+    i <- 5
+ }
[1] 1
[1] 2
[1] 3
[1] 4
[1] 5
[1] 6
[1] 7
[1] 8
[1] 9
[1] 10
> 
> z <- 1:10
> for (i in z) {
+    z[] <- 5 # all elements of z are now 5
+    print(i) # but the original z is still printed
+ }
[1] 1
[1] 2
[1] 3
[1] 4
[1] 5
[1] 6
[1] 7
[1] 8
[1] 9
[1] 10
> # the loop keeps going as if z was not changed
> z # note that z was changed outside the loop!
 [1] 5 5 5 5 5 5 5 5 5 5
> # what happens inside the loop does not stay inside

It is also easy to create an infinite loop. If this happens, it is sometimes possible to exit by pressing 'control' + 'c' or the 'ESC' key.

# an infinite loop
> #while (TRUE) {
>    # do something
> #}

Just to reiterate (no pun intended), it is important to initialize the total amount of memory that will be required. The small example below reveals the big difference in speed that memory initialization makes.

reps <- 20000
> a <- numeric()
> system.time({
+    for (i in 1:reps) {
+       a <- c(a, i)
+    }
+ })
   user  system elapsed 
  0.864   0.578   1.446 
> b <- numeric(reps)
> system.time({
+    for (i in 1:reps) {
+       b[i] <- i
+    }
+ })
   user  system elapsed 
  0.042   0.004   0.044

The next example is of robust regression using a while loop. Here, points are weighted by the inverse of their residuals, which are iteratively calculated. The plot shows how the trendline adapts on each iteration in order to find the best fit line.

y <- x <- 1.7^(1:10)
> y[10] <- 1 # extreme outlier
> plot(x, y)
> l <- lm(y~x)
> Y <- coef(l)[1] + coef(l)[2]*x
> points(x, Y, type="l")
> 
> residuals <- l$residuals
> delta <- Inf
> cols <- rainbow(50)
> count <- 0L
> while(delta > 0.01) {
+    # linear regression
+    l <- lm(y~x,
+       weights=abs(residuals)^-1)
+    
+    # find change in residuals
+    delta <- sum(abs(residuals - l$residuals))
+    residuals <- l$residuals
+    
+    # plot the line
+    count <- count + 1L
+    Y <- coef(l)[1] + coef(l)[2]*x
+    points(x, Y, type="l", col=cols[count])
+ }
> count
[1] 45
> coef(l)
 (Intercept)            x 
0.0006922342 0.9999176688

It is also possible to nest loops. Conventionally, the first loop iterator is named 'i', the second 'j', and the third 'k'.

d <- matrix(nrow=10, ncol=5)
> for (i in seq_len(dim(d)[1])) {
+    for (j in seq_len(dim(d)[2])) {
+       d[i, j] <- i*j
+    }
+ }
> d
      [,1] [,2] [,3] [,4] [,5]
 [1,]    1    2    3    4    5
 [2,]    2    4    6    8   10
 [3,]    3    6    9   12   15
 [4,]    4    8   12   16   20
 [5,]    5   10   15   20   25
 [6,]    6   12   18   24   30
 [7,]    7   14   21   28   35
 [8,]    8   16   24   32   40
 [9,]    9   18   27   36   45
[10,]   10   20   30   40   50
> outer(1:10, 1:5, "*") # equilvalent
      [,1] [,2] [,3] [,4] [,5]
 [1,]    1    2    3    4    5
 [2,]    2    4    6    8   10
 [3,]    3    6    9   12   15
 [4,]    4    8   12   16   20
 [5,]    5   10   15   20   25
 [6,]    6   12   18   24   30
 [7,]    7   14   21   28   35
 [8,]    8   16   24   32   40
 [9,]    9   18   27   36   45
[10,]   10   20   30   40   50

Finally, lots of times it is nice to have a progress bar if the loop is slow.

pBar <- txtProgressBar(style=3, max=100)
  |                                            |   0%
> for (i in seq_len(100)) {
+    Sys.sleep(0.05) # pause for 1/20 seconds
+    setTxtProgressBar(pBar, i)
+ }
  |============================================| 100%
> close(pBar)

< Previous Lesson Next Lesson >