Gallery - Example 3
Below is the R code to reproduce the example:library(DECIPHER)
> library(RSQLite)
>
> # load sequences into a database
> fas <- system.file("extdata",
+ "Streptomyces_ITS_aligned.fas",
+ package="DECIPHER")
> dbConn <- dbConnect(SQLite(),
+ ":memory:")
> Seqs2DB(fas,
+ type="FASTA",
+ dbFile=dbConn,
+ "")
Reading FASTA file chunk 1
88 total sequences in table Seqs.
Time difference of 0.03 secs
>
> # identify the sequences by their species
> x <- dbGetQuery(dbConn,
+ "select description from Seqs")$description
> x <- unlist(lapply(strsplit(x,
+ "Streptomyces "),
+ `[`,
+ 2L))
> x <- unlist(lapply(strsplit(x,
+ " "),
+ function(x) {
+ if (x[1]=="sp.")
+ return(x[2])
+ x[1]
+ }))
> x <- ifelse(x=="sp_AA4",
+ "S. AA4",
+ paste("S.",
+ x))
> Add2DB(myData=data.frame(identifier=x),
+ dbConn)
Expression:
update or replace Seqs set identifier = :identifier where row_names =
:row_names
Added to table Seqs: "identifier".
Time difference of 0 secs
>
> # form a consensus for each species
> cons <- IdConsensus(dbConn,
+ threshold=0.3,
+ minInformation=0.1)
|============================================| 100%
Found consensus for 19 groups.
Time difference of 0.26 secs
>
> # calculate a maximum likelihood tree
> dend <- Treeline(cons,
+ method="ML",
+ type="dendrogram")
Constructing initial neighbor-joining tree:
|============================================| 100%
Fitting initial tree to models:
JC69 -ln(L) = 4784, AICc = 9643, BIC = 9790
K80 -ln(L) = 4734, AICc = 9544, BIC = 9695
F81 -ln(L) = 4768, AICc = 9617, BIC = 9777
HKY85 -ln(L) = 4711, AICc = 9507, BIC = 9670
T92 -ln(L) = 4719, AICc = 9518, BIC = 9673
TN93 -ln(L) = 4699, AICc = 9484, BIC = 9651
SYM -ln(L) = 4709, AICc = 9505, BIC = 9672
GTR -ln(L) = 4685, AICc = 9462, BIC = 9642
The selected model was: GTR
Optimizing up to 400 candidate trees:
Tree #177. -ln(L) = 4658.481 (0.000%), 3 Climbs, 0 Grafts of 0
Finalizing the best tree (#167):
-ln(L) = 4658.481 (0.000%), 0 Climbs
Model parameters:
Frequency(A) = 0.208
Frequency(C) = 0.255
Frequency(G) = 0.347
Frequency(T) = 0.190
Rate A <-> C = 0.438
Rate A <-> G = 1.191
Rate A <-> T = 0.915
Rate C <-> G = 0.545
Rate C <-> T = 2.228
Rate G <-> T = 1.000
Time difference of 11.3 secs
> dend <- dendrapply(dend,
+ FUN=function(n) {
+ if(is.leaf(n))
+ attr(n, "label") <-
+ as.expression(substitute(italic(leaf),
+ list(leaf=attr(n, "label"))))
+ n
+ })
>
> # display the phylogenetic tree
> p <- par(mar=c(1, 1, 1, 10),
+ xpd=TRUE)
> plot(dend,
+ yaxt="n",
+ horiz=TRUE)
> arrows(-0.2, 2, 0, 2,
+ angle=90,
+ length=0.05,
+ code=3)
> text(-0.1, 2,
+ "0.2 subs./site",
+ pos=3))
> par(p)
>
> dbDisconnect(dbConn)
[1] TRUE