DECIPHER - R Lesson #9

R Lesson #9 - Plotting curves and lines

Plotting of points, lines, and curves is illustrated in this lesson. First spirographs are the generated for plotting, followed by the plotting of built-in datasets.

Hide output

# Plotting spirographs
> t <- 1:1000
> k <- 0.1
> l <- 5
> x1 <- (1 - k)*cos(t) + l*k*cos(t*(1 - k)/k)
> y1 <- (1 - k)*sin(t) - l*k*sin(t*(1 - k)/k)
> k <- 4
> l <- 1
> x2 <- (1 - k)*cos(t) + l*k*cos(t*(1 - k)/k)
> y2 <- (1 - k)*sin(t) - l*k*sin(t*(1 - k)/k)
> 
> # plotting points
> plot(x1, y1) # basic x-y scatterplot
> plot(x1, y1, pch=1:25) # type of point
> ?points # options for different symbols
> plot(x1, y1, pch=1:25,
+    ylab="Y axis label", xlab="X axis label",
+    main="Title of plot")
> 
> # adding color, text, and lines
> plot(x1, y1,
+    asp=2, # aspect ratio
+    col=rainbow(100, s=0.8, v=0.8),
+    cex=0.5) # point size
> abline(a=0, b=1) # line y=x
> text(2.5, 1, "Some text.") # text in the plot
> mtext("Some margin text", padj=-1) # text in the margin
> axis(4, -1:1, col="red", col.axis="blue") # right axis
> 
> # plotting lines (type="l")
> # points (p), lines (l), both (b)
> plot(y2, x2, type="l",
+    col='red2',
+    xlab="", ylab="",
+    xaxt="n", yaxt="n", # no axes
+    bty="n", # no boundary box
+    main="Happy Valentine's Day!")
> 
> # specifying the axes' ranges
> plot(x1 + 1, y1, type="l",
+    col="gray",
+    xlim=range(x1, x2), ylim=range(y1, y2),
+    xlab="X position", ylab="Y position")
> # add points onto an existing plot
> points(x2, y2, type="l",
+    lty=2, # line type
+    lwd=2, # line width
+    col="darkgreen")
> arrows(-1, 0, 1, 0) # add arrows
> legend("topright", # placement (or x, y position)
+    c("equation #1", "equation #2"),
+    col=c("gray", "darkgreen"),
+    lty=1:2, lwd=1:2, bty="n")

Next, it is necessary to load the famous iris dataset, which contains information about three flower types. The data is stored in a data.frame, with character columns being stored as factors. Factors are a more efficient representation of repetitive character data, with 1 = "Level1", 2="Level2", etc. When initializing a data.frame, it is possible to prevent characters from being converted to factors by setting the argument stringsAsFactors to FALSE.

# see a list of all built-in datasets:
> data(package = .packages(all.available = TRUE))
> # iris is a famous dataset available in R
> data(iris)
> typeof(iris) # data.frame
[1] "list"
> ?iris
> head(iris)
  Sepal.Length Sepal.Width Petal.Length Petal.Width
1          5.1         3.5          1.4         0.2
2          4.9         3.0          1.4         0.2
3          4.7         3.2          1.3         0.2
4          4.6         3.1          1.5         0.2
5          5.0         3.6          1.4         0.2
6          5.4         3.9          1.7         0.4
  Species
1  setosa
2  setosa
3  setosa
4  setosa
5  setosa
6  setosa
> typeof(iris$Species) # integer
[1] "integer"
> 
> # recall that data.frames are type list
> # so they can be accessed by name in two ways:
> head(iris[, "Species"])
[1] setosa setosa setosa setosa setosa setosa
Levels: setosa versicolor virginica
> head(iris$Species)
[1] setosa setosa setosa setosa setosa setosa
Levels: setosa versicolor virginica
> all(iris[, "Species"]==iris$Species)
[1] TRUE
> u <- unique(iris[, "Species"])
> u
[1] setosa     versicolor virginica 
Levels: setosa versicolor virginica
> as.character(u) # convert factor to character
[1] "setosa"     "versicolor" "virginica" 
> 
> # using the match function to specify colors
> # match returns the first index of x in table
> s <- sample(10)
> s
 [1]  4 10  9  5  3  7  6  8  2  1
> match(1:3, s)
[1] 10  9  5
> m <- match(iris[, "Species"], # each row
+    u) # the three unique species names
> cols <- c("green", "blue", "red") # color vector
> plot(iris[, "Petal.Length"],
+    iris[, "Petal.Width"],
+    col=cols[m],
+    xlab="Petal Length (cm)",
+    ylab="Petal Width (cm)")

Trendlines can be fitted by specifying a a formula. For example, the formula for y = m*x + b is y ~ x, and the formula for y = m*x is y ~ x + 0.

l <- lm(Petal.Width~Petal.Length,
+    data=iris)
> l


Call:
lm(formula = Petal.Width ~ Petal.Length, data = iris)


Coefficients:
 (Intercept)  Petal.Length  
     -0.3631        0.4158  


> summary(l)


Call:
lm(formula = Petal.Width ~ Petal.Length, data = iris)


Residuals:
     Min       1Q   Median       3Q      Max 
-0.56515 -0.12358 -0.01898  0.13288  0.64272 


Coefficients:
              Estimate Std. Error t value Pr(>|t|)
(Intercept)  -0.363076   0.039762  -9.131  4.7e-16
Petal.Length  0.415755   0.009582  43.387  < 2e-16
                
(Intercept)  ***
Petal.Length ***
---
Signif. codes:  
0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1


Residual standard error: 0.2065 on 148 degrees of freedom
Multiple R-squared:  0.9271,   Adjusted R-squared:  0.9266 
F-statistic:  1882 on 1 and 148 DF,  p-value: < 2.2e-16


> summary(l)$r.squared
[1] 0.9271098
> coef(l)
 (Intercept) Petal.Length 
  -0.3630755    0.4157554 
> abline(l)
> 
> # LOcally Weighted RegrESSion (LOWESS)
> low <- lowess(iris[, "Petal.Length"], iris[, "Petal.Width"])
> str(low)
List of 2
 $ x: num [1:150] 1 1.1 1.2 1.2 1.3 1.3 1.3 1.3 1.3 1.3 ...
 $ y: num [1:150] 0.0689 0.1075 0.1462 0.1462 0.185 ...
> lines(low, col="orange")
> # a more complicated legend with points and lines
> legend("topleft", # placement (or x, y position)
+    legend=c(as.character(u), "Regression", "LOWESS"),
+    pch=c(rep(1, 3), rep(NA, 2)), # point type
+    lty=c(rep(NA, 3), rep(1, 2)), # line type
+    col=c(cols, "black", "orange")) # colors
> 
> # scatterplot matrix
> pairs(~Petal.Length+Petal.Width+Sepal.Length+Sepal.Width,
+    data=iris,
+    col=cols[m])

For the next session, it is necessary to download a dataset in tab-delimited format from here. This dataset contains the stock prices for several stocks during the year 2014. To figure out a filepath, drag the file into R and copy the path.

# set the correct path below
> r <- read.table("<<PATH TO Stocks2014.txt>>",
+    sep="\t", # tab delimited
+    header=TRUE, # first line is column names
+    stringsAsFactors=FALSE)
> head(r)
       Date   Open   High    Low  Close   Volume
1 30-Dec-14 113.64 113.92 112.11 112.52 29881477
2 29-Dec-14 113.79 114.77 113.70 113.91 27598920
3 26-Dec-14 112.10 114.52 112.01 113.99 33720951
4 24-Dec-14 112.58 112.71 112.01 112.01 14479611
5 23-Dec-14 113.23 113.33 112.46 112.54 26028419
6 22-Dec-14 112.16 113.49 111.97 112.94 45167549
  Ticker
1   AAPL
2   AAPL
3   AAPL
4   AAPL
5   AAPL
6   AAPL
> typeof(r)
[1] "list"
> class(r)
[1] "data.frame"
> t <- unique(r$Ticker)
> d <- unique(r$Date)
> s <- rev(seq_along(d)) # reverse order
> Day <- s[match(r$Date, d)]
> r <- cbind(r, TradingDay=Day)
> head(r)
       Date   Open   High    Low  Close   Volume
1 30-Dec-14 113.64 113.92 112.11 112.52 29881477
2 29-Dec-14 113.79 114.77 113.70 113.91 27598920
3 26-Dec-14 112.10 114.52 112.01 113.99 33720951
4 24-Dec-14 112.58 112.71 112.01 112.01 14479611
5 23-Dec-14 113.23 113.33 112.46 112.54 26028419
6 22-Dec-14 112.16 113.49 111.97 112.94 45167549
  Ticker TradingDay
1   AAPL        251
2   AAPL        250
3   AAPL        249
4   AAPL        248
5   AAPL        247
6   AAPL        246
> plot(Close~TradingDay,
+    data=r,
+    col=match(r$Ticker, t),
+    cex=0.3, log="y")
> legend(x=-10, y=max(r$Close)*1.5,
+    legend=t, col=seq_along(t),
+    ncol=length(t), # number of columns
+    pch=1, cex=0.8, bty="n",
+    xpd=TRUE) # do not crop outside plot

< Previous Lesson Next Lesson >