R Lesson #13 - Fitting parameters
This lesson showcases the optim function, which can be used to find the input parameter values that minimize the output of another function. The optim function works by adaptively changing the input parameters until it has optimized the objective function.X <- 1:6
> Y <- c(12, 1, 44, 17, 95, 11) # randomly selected values
> plot(X, Y)
>
> fitMe <- function(params) {
+ b <- params[1]
+ m <- params[2]
+
+ points(X, m*X + b, type="l")
+ Sys.sleep(0.1) # slow it down to watch progress
+
+ error <- sum((Y - (m*X + b))^2)
+ print(error)
+ return(error)
+ }
>
> o <- optim(c(b=0, m=10), fitMe)
[1] 5516
[1] 5582
[1] 5917
[1] 5321
[1] 5237.5
[1] 5249.5
[1] 5256.219
[1] 5540
[1] 5307.5
[1] 5319.5
[1] 5256.875
[1] 5262.875
[1] 5244.969
[1] 5247.969
[1] 5242.451
[1] 5234.217
[1] 5230.281
[1] 5229.285
[1] 5230.532
[1] 5224.18
[1] 5223.279
[1] 5226.742
[1] 5224.447
[1] 5229.278
[1] 5224.903
[1] 5224.906
[1] 5223.81
[1] 5225.205
[1] 5223.613
[1] 5223.197
[1] 5223.466
[1] 5223.421
[1] 5223.201
[1] 5223.334
[1] 5223.171
[1] 5223.287
[1] 5223.151
[1] 5223.355
[1] 5223.147
[1] 5223.164
[1] 5223.147
[1] 5223.181
[1] 5223.143
> o$par # optimal parameters
b m
5.002281 7.142624
>
> coef(lm(Y~X)) # same as o$par
(Intercept) X
5.000000 7.142857
it <- 0 # global variable
> cols <- rainbow(100)
>
> f <- function(x, plot=TRUE) {
+ it <<- it + 1L # globally iterate
+
+ # calculate the sum of three functions
+ y1 <- -50*(x - 2)^2 + 1
+ y2 <- abs(x) - 1
+ y3 <- 2*x^4 - 20*x^3
+ Y <- y1 + y2 + y3
+
+ if (plot && it <= 100)
+ points(x, Y, col=cols[it])
+
+ return(Y)
+ }
>
> t <- seq(-10, 15, 0.01)
> plot(t, f(t, plot=FALSE), type="l")
>
> o <- optim(0, # initial parameter value
+ f, # function to find minimum value
+ method="SANN") # optimization method
>
> print(it) # number of iterations
[1] 10001
> points(o$par, f(o$par))
load("<<PATH TO Madison_Weather.RData>>")
>
> time <- Weather$Year - 1900 +
+ Weather$Day/365 +
+ Weather$Hour/(365*24) +
+ Weather$Min/(365*24*60)
> Weather <- cbind(Weather, Time=time)
>
> max_points <- 1e5
> plot(Weather$Time[seq_len(max_points)],
+ Weather$Temp[seq_len(max_points)],
+ type="l",
+ ylab="Temperature (degrees Fahrenheit)",
+ xlab="Time")
# sum of squared errors (SSE)
> SSE <- function(estimated, actual) {
+ sum((estimated - actual)^2,
+ na.rm=TRUE)
+ }
>
> iteration <- 0L # used as a global variable
> cols <- rep(rainbow(100), 100) # colors to plot
> plot_sine <- function(params, ...) {
+ amp <- params[1]
+ phase <- params[2]
+ freq <- params[3]
+ off <- params[4]
+
+ x <- Weather$Time[seq_len(max_points)]
+ y <- amp*sin(freq*(x - phase)) + off
+ points(x,
+ y,
+ type="l",
+ ...)
+ }
>
> # function being passed to optim:
> goodFit <- function(params, plot=FALSE) {
+ iteration <<- iteration + 1L
+
+ amp <- params[1]
+ phase <- params[2]
+ freq <- params[3]
+ off <- params[4]
+
+ sine <- amp*sin(freq*(Weather$Time - phase)) + off
+
+ error <- SSE(sine,
+ Weather$Temp)
+
+ if (plot)
+ plot_sine(params,
+ lwd=0.5,
+ col=cols[iteration])
+
+ return(error)
+ }
>
> o <- optim(c(1, 1, 1, 1), # params
+ goodFit, # function
+ plot=TRUE) # function arguments
>
> print(iteration)
[1] 79
> o # the output is a list
$par
[1] -25.48035 -27.69295 16.78735 38.93353
$value
[1] 510106022
$counts
function gradient
79 NA
$convergence
[1] 10
$message
NULL
>
> plot_sine(o$par,
+ col="darkred", lwd=3)
# re-initialize the plot
> plot(Weather$Time[seq_len(max_points)],
+ Weather$Temp[seq_len(max_points)],
+ type="l",
+ ylab="Temperature (degrees Fahrenheit)",
+ xlab="Time")
>
> iteration <- 0L # used as a global variable
> o <- optim(c(amp=20, # +/- 20 from mean temp
+ phase=0.3, # guess and check
+ freq=2*pi, # one cycle per year
+ off=50), # mean temp in Madison
+ goodFit,
+ plot=TRUE)
>
> print(iteration)
[1] 419
> o
$par
amp phase freq off
26.2572068 0.3034806 6.2834491 46.9456483
$value
[1] 74467278
$counts
function gradient
419 NA
$convergence
[1] 0
$message
NULL
>
> plot_sine(o$par,
+ col="darkred", lwd=3)
# note that the final '/' is required:
> path <- "<<PATH TO ../single_colony/>>"
> images <- list.files(path) # list files
> head(images)
[1] "Test5S01_001_2.jpg" "Test5S01_002_2.jpg"
[3] "Test5S01_003_2.jpg" "Test5S01_004_2.jpg"
[5] "Test5S01_005_2.jpg" "Test5S01_006_2.jpg"
>
> #install.packages("jpeg") # only need to run this once
> library(jpeg)
> intensity <- numeric(length(images))
> # loop though the images in the directory:
> for (i in seq_along(images)) {
+ img <- readJPEG(paste0(path,
+ images[i]))
+ intensity[i] <- mean(img)
+ }
>
> x <- (0:(length(intensity) - 1))*4
> plot(x,
+ intensity,
+ ylab="Average Intensity",
+ xlab="Hours of Growth")
# parameters determined through guess-and-check
> offset <- min(intensity)
> amp <- diff(range(intensity))
> slope <- 5
> shift <- 4*which.min(abs(intensity - mean(intensity)))
>
> # the sigmoidal function of four parameters
> y <- amp/(1 + exp(-slope*(log(x) - log(shift)))) + offset
>
> points(x, y, type="l") # plot the initial curve
>
> # optimize the parameters
> f <- function(params, plot=FALSE) {
+ offset <- params[1]
+ amp <- params[2]
+ slope <- params[3]
+ shift <- params[4]
+
+ y <- amp/(1 + exp(-slope*(log(x) - log(shift)))) + offset
+
+ error <- sum((intensity - y)^2) # SSE
+
+ if (plot)
+ points(x, y, type="l", col="red")
+
+ invisible(error)
+ }
>
> # simultaneously optimize the parameters
> o <- optim(c(offset,
+ amp,
+ slope,
+ shift),
+ f)
> # display the parameters
> cat("offset =", o$par[1],
+ "\namp =", o$par[2],
+ "\nslope =", o$par[3],
+ "\nshift =", o$par[4])
offset = 0.1183825
amp = 0.2141441
slope = 3.971287
shift = 120.7618
>
> # plot the optimized sigmoidal
> f(o$par, plot=TRUE)
> legend("topleft",
+ c("Initial", "Optimal"),
+ lty=1,
+ col=c("black", "red"))