Gallery - Example 3
Below is the R code to reproduce the example:library(DECIPHER)
>
> # load sequences into a database
> fas <- system.file("extdata",
+ "Streptomyces_ITS_aligned.fas",
+ package="DECIPHER")
> dbConn <- dbConnect(SQLite(),
+ ":memory:")
> Seqs2DB(fas,
+ type="FASTA",
+ dbFile=dbConn,
+ "")
Reading FASTA file chunk 1
88 total sequences in table Seqs.
Time difference of 0.03 secs
>
> # identify the sequences by their species
> x <- dbGetQuery(dbConn,
+ "select description from Seqs")$description
> x <- unlist(lapply(strsplit(x,
+ "Streptomyces "),
+ `[`,
+ 2L))
> x <- unlist(lapply(strsplit(x,
+ " "),
+ function(x) {
+ if (x[1]=="sp.")
+ return(x[2])
+ x[1]
+ }))
> x <- ifelse(x=="sp_AA4",
+ "S. AA4",
+ paste("S.",
+ x))
> Add2DB(myData=data.frame(identifier=x),
+ dbConn)
Expression:
update or replace Seqs set identifier = :identifier where row_names =
:row_names
Added to table Seqs: "identifier".
Time difference of 0 secs
>
> # form a consensus for each species
> cons <- IdConsensus(dbConn,
+ threshold=0.3,
+ minInformation=0.1)
|============================================| 100%
Found consensus for 19 groups.
Time difference of 0.26 secs
>
> # calculate a maximum likelihood tree
> d <- DistanceMatrix(cons,
+ correction="Jukes-Cantor")
|============================================| 100%
Time difference of 0.06 secs
> dend <- IdClusters(d,
+ method="ML",
+ type="dendrogram",
+ myXStringSet=cons)
Constructing initial neighbor-joining tree:
|============================================| 100%
JC69: -ln(L)=4967, AICc=10014, BIC=10133
JC69+G4: -ln(L)=4522, AICc=9126, BIC=9248
K80: -ln(L)=4886, AICc=9855, BIC=9977
K80+G4: -ln(L)=4443, AICc=8970, BIC=9095
F81: -ln(L)=4872, AICc=9833, BIC=9960
F81+G4: -ln(L)=4400, AICc=8890, BIC=9021
HKY85: -ln(L)=4780, AICc=9650, BIC=9781
HKY85+G4: -ln(L)=4310, AICc=8714, BIC=8848
T92: -ln(L)=4817, AICc=9718, BIC=9843
T92+G4: -ln(L)=4359, AICc=8805, BIC=8933
TN93: -ln(L)=4769, AICc=9631, BIC=9765
TN93+G4: -ln(L)=4307, AICc=8711, BIC=8847
The selected model was: TN93+G4
Maximizing Likelihood of Tree:
-ln(Likelihood) = 4254 (1.23% improvement), 6 NNIs
Model parameters:
Frequency(A) = 0.193
Frequency(C) = 0.244
Frequency(G) = 0.355
Frequency(T) = 0.208
Rate A <-> G = 1.445
Rate C <-> T = 2.478
Transversion rates = 1
Alpha = 0.291
Time difference of 1.56 secs
> dend <- dendrapply(dend,
+ FUN=function(n) {
+ if(is.leaf(n))
+ attr(n, "label") <-
+ as.expression(substitute(italic(leaf),
+ list(leaf=attr(n, "label"))))
+ n
+ })
>
> # display the phylogenetic tree
> p <- par(mar=c(1, 1, 1, 10),
+ xpd=TRUE)
> plot(dend,
+ yaxt="n",
+ horiz=TRUE)
> arrows(-0.1, 6, -0.2, 6,
+ angle=90,
+ length=0.05,
+ code=3)
> text(-0.15, 6,
+ "0.1",
+ adj=c(0.5, -0.5))
> par(p)
>
> dbDisconnect(dbConn)
[1] TRUE