R Lesson #9 - Plotting curves and lines
Plotting of points, lines, and curves is illustrated in this lesson. First spirographs are the generated for plotting, followed by the plotting of built-in datasets.# Plotting spirographs
> t <- 1:1000
> k <- 0.1
> l <- 5
> x1 <- (1 - k)*cos(t) + l*k*cos(t*(1 - k)/k)
> y1 <- (1 - k)*sin(t) - l*k*sin(t*(1 - k)/k)
> k <- 4
> l <- 1
> x2 <- (1 - k)*cos(t) + l*k*cos(t*(1 - k)/k)
> y2 <- (1 - k)*sin(t) - l*k*sin(t*(1 - k)/k)
>
> # plotting points
> plot(x1, y1) # basic x-y scatterplot
> plot(x1, y1, pch=1:25) # type of point
> ?points # options for different symbols
> plot(x1, y1, pch=1:25,
+ ylab="Y axis label", xlab="X axis label",
+ main="Title of plot")
>
> # adding color, text, and lines
> plot(x1, y1,
+ asp=2, # aspect ratio
+ col=rainbow(100, s=0.8, v=0.8),
+ cex=0.5) # point size
> abline(a=0, b=1) # line y=x
> text(2.5, 1, "Some text.") # text in the plot
> mtext("Some margin text", padj=-1) # text in the margin
> axis(4, -1:1, col="red", col.axis="blue") # right axis
>
> # plotting lines (type="l")
> # points (p), lines (l), both (b)
> plot(y2, x2, type="l",
+ col='red2',
+ xlab="", ylab="",
+ xaxt="n", yaxt="n", # no axes
+ bty="n", # no boundary box
+ main="Happy Valentine's Day!")
>
> # specifying the axes' ranges
> plot(x1 + 1, y1, type="l",
+ col="gray",
+ xlim=range(x1, x2), ylim=range(y1, y2),
+ xlab="X position", ylab="Y position")
> # add points onto an existing plot
> points(x2, y2, type="l",
+ lty=2, # line type
+ lwd=2, # line width
+ col="darkgreen")
> arrows(-1, 0, 1, 0) # add arrows
> legend("topright", # placement (or x, y position)
+ c("equation #1", "equation #2"),
+ col=c("gray", "darkgreen"),
+ lty=1:2, lwd=1:2, bty="n")
# see a list of all built-in datasets:
> data(package = .packages(all.available = TRUE))
> # iris is a famous dataset available in R
> data(iris)
> typeof(iris) # data.frame
[1] "list"
> ?iris
> head(iris)
Sepal.Length Sepal.Width Petal.Length Petal.Width
1 5.1 3.5 1.4 0.2
2 4.9 3.0 1.4 0.2
3 4.7 3.2 1.3 0.2
4 4.6 3.1 1.5 0.2
5 5.0 3.6 1.4 0.2
6 5.4 3.9 1.7 0.4
Species
1 setosa
2 setosa
3 setosa
4 setosa
5 setosa
6 setosa
> typeof(iris$Species) # integer
[1] "integer"
>
> # recall that data.frames are type list
> # so they can be accessed by name in two ways:
> head(iris[, "Species"])
[1] setosa setosa setosa setosa setosa setosa
Levels: setosa versicolor virginica
> head(iris$Species)
[1] setosa setosa setosa setosa setosa setosa
Levels: setosa versicolor virginica
> all(iris[, "Species"]==iris$Species)
[1] TRUE
> u <- unique(iris[, "Species"])
> u
[1] setosa versicolor virginica
Levels: setosa versicolor virginica
> as.character(u) # convert factor to character
[1] "setosa" "versicolor" "virginica"
>
> # using the match function to specify colors
> # match returns the first index of x in table
> s <- sample(10)
> s
[1] 4 10 9 5 3 7 6 8 2 1
> match(1:3, s)
[1] 10 9 5
> m <- match(iris[, "Species"], # each row
+ u) # the three unique species names
> cols <- c("green", "blue", "red") # color vector
> plot(iris[, "Petal.Length"],
+ iris[, "Petal.Width"],
+ col=cols[m],
+ xlab="Petal Length (cm)",
+ ylab="Petal Width (cm)")
l <- lm(Petal.Width~Petal.Length,
+ data=iris)
> l
Call:
lm(formula = Petal.Width ~ Petal.Length, data = iris)
Coefficients:
(Intercept) Petal.Length
-0.3631 0.4158
> summary(l)
Call:
lm(formula = Petal.Width ~ Petal.Length, data = iris)
Residuals:
Min 1Q Median 3Q Max
-0.56515 -0.12358 -0.01898 0.13288 0.64272
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.363076 0.039762 -9.131 4.7e-16
Petal.Length 0.415755 0.009582 43.387 < 2e-16
(Intercept) ***
Petal.Length ***
---
Signif. codes:
0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.2065 on 148 degrees of freedom
Multiple R-squared: 0.9271, Adjusted R-squared: 0.9266
F-statistic: 1882 on 1 and 148 DF, p-value: < 2.2e-16
> summary(l)$r.squared
[1] 0.9271098
> coef(l)
(Intercept) Petal.Length
-0.3630755 0.4157554
> abline(l)
>
> # LOcally Weighted RegrESSion (LOWESS)
> low <- lowess(iris[, "Petal.Length"], iris[, "Petal.Width"])
> str(low)
List of 2
$ x: num [1:150] 1 1.1 1.2 1.2 1.3 1.3 1.3 1.3 1.3 1.3 ...
$ y: num [1:150] 0.0689 0.1075 0.1462 0.1462 0.185 ...
> lines(low, col="orange")
> # a more complicated legend with points and lines
> legend("topleft", # placement (or x, y position)
+ legend=c(as.character(u), "Regression", "LOWESS"),
+ pch=c(rep(1, 3), rep(NA, 2)), # point type
+ lty=c(rep(NA, 3), rep(1, 2)), # line type
+ col=c(cols, "black", "orange")) # colors
>
> # scatterplot matrix
> pairs(~Petal.Length+Petal.Width+Sepal.Length+Sepal.Width,
+ data=iris,
+ col=cols[m])
# set the correct path below
> r <- read.table("<<PATH TO Stocks2014.txt>>",
+ sep="\t", # tab delimited
+ header=TRUE, # first line is column names
+ stringsAsFactors=FALSE)
> head(r)
Date Open High Low Close Volume
1 30-Dec-14 113.64 113.92 112.11 112.52 29881477
2 29-Dec-14 113.79 114.77 113.70 113.91 27598920
3 26-Dec-14 112.10 114.52 112.01 113.99 33720951
4 24-Dec-14 112.58 112.71 112.01 112.01 14479611
5 23-Dec-14 113.23 113.33 112.46 112.54 26028419
6 22-Dec-14 112.16 113.49 111.97 112.94 45167549
Ticker
1 AAPL
2 AAPL
3 AAPL
4 AAPL
5 AAPL
6 AAPL
> typeof(r)
[1] "list"
> class(r)
[1] "data.frame"
> t <- unique(r$Ticker)
> d <- unique(r$Date)
> s <- rev(seq_along(d)) # reverse order
> Day <- s[match(r$Date, d)]
> r <- cbind(r, TradingDay=Day)
> head(r)
Date Open High Low Close Volume
1 30-Dec-14 113.64 113.92 112.11 112.52 29881477
2 29-Dec-14 113.79 114.77 113.70 113.91 27598920
3 26-Dec-14 112.10 114.52 112.01 113.99 33720951
4 24-Dec-14 112.58 112.71 112.01 112.01 14479611
5 23-Dec-14 113.23 113.33 112.46 112.54 26028419
6 22-Dec-14 112.16 113.49 111.97 112.94 45167549
Ticker TradingDay
1 AAPL 251
2 AAPL 250
3 AAPL 249
4 AAPL 248
5 AAPL 247
6 AAPL 246
> plot(Close~TradingDay,
+ data=r,
+ col=match(r$Ticker, t),
+ cex=0.3, log="y")
> legend(x=-10, y=max(r$Close)*1.5,
+ legend=t, col=seq_along(t),
+ ncol=length(t), # number of columns
+ pch=1, cex=0.8, bty="n",
+ xpd=TRUE) # do not crop outside plot