Gallery - Example 1
Below is the R code to reproduce the example:library(DECIPHER)
> library(RSQLite)
>
> # load sequences into a database
> fas <- system.file("extdata",
+ "50S_ribosomal_protein_L2.fas",
+ package="DECIPHER")
> dbConn <- dbConnect(SQLite(),
+ ":memory:")
> Seqs2DB(fas,
+ type="FASTA",
+ dbFile=dbConn,
+ identifier="")
Reading FASTA file from line 1 to 1e+05
317 total sequences in table Seqs.
Time difference of 0.15 secs
>
> # identify the sequences based on genus
> x <- dbGetQuery(dbConn,
+ "select description from Seqs")$description
> ns <- unlist(lapply(strsplit(x,
+ split=" "),
+ FUN=`[`,
+ 1L))
> Add2DB(myData=data.frame(identifier=ns),
+ dbFile=dbConn)
Expression:
update or replace Seqs set identifier = :identifier where row_names =
:row_names
Added to table Seqs: "identifier".
Time difference of 0.01 secs
>
> # align the translated sequences
> dna <- SearchDB(dbConn,
+ nameBy="identifier")
Search Expression:
select identifier, _Seqs.sequence from Seqs join _Seqs on
Seqs.row_names = _Seqs.row_names where _Seqs.row_names
in (select row_names from Seqs)
DNAStringSet of length: 317
Time difference of 0.03 secs
> AA <- AlignTranslation(dna,
+ type="AAStringSet")
Determining distance matrix based on shared 4-mers:
|============================================| 100%
Time difference of 2.02 secs
Clustering into groups by similarity:
|============================================| 100%
Time difference of 0.49 secs
Aligning Sequences:
|============================================| 100%
Time difference of 5.2 secs
Determining distance matrix based on alignment:
|============================================| 100%
Time difference of 0.36 secs
Reclustering into groups by similarity:
|============================================| 100%
Time difference of 0.44 secs
Realigning Sequences:
|============================================| 100%
Time difference of 5.75 secs
Refining the alignment:
|============================================| 100%
Time difference of 0.01 secs
> Seqs2DB(AA,
+ type="AAStringSet",
+ dbFile=dbConn,
+ identifier="",
+ tblName="Aligned")
Adding 317 sequences to the database.
317 total sequences in table Aligned.
Time difference of 0.28 secs
> Add2DB(myData=data.frame(identifier=ns),
+ dbFile=dbConn,
+ tblName="Aligned")
Expression:
update or replace Aligned set identifier = :identifier where
row_names = :row_names
Added to table Aligned: "identifier".
Time difference of 0.01 secs
>
> # form a consensus sequence for each genus
> cons <- IdConsensus(dbConn,
+ tblName="Aligned",
+ type="AAStringSet",
+ threshold=0.5,
+ minInformation=0.5)
|============================================| 100%
Found consensus for 70 groups.
Time difference of 1.08 secs
> Seqs2DB(cons,
+ type="DNAStringSet",
+ dbFile=dbConn,
+ identifier="",
+ tblName="Consensus")
Adding 70 sequences to the database.
70 total sequences in table Consensus.
Time difference of 0.05 secs
> aa <- SearchDB(dbConn,
+ "Consensus",
+ type="AAStringSet",
+ nameBy="description",
+ limit="40,20",
+ removeGaps="common")
Search Expression:
select description, _Consensus.sequence from
Consensus join _Consensus on Consensus.row_names =
_Consensus.row_names where _Consensus.row_names in
(select row_names from Consensus) limit 40,20
AAStringSet of length: 20
Time difference of 0.01 secs
>
> # view the sequences in a browser
> BrowseSeqs(aa,
+ threshold=0.5,
+ minInformation=0.5,
+ colWidth=60)
> dbDisconnect(dbConn)
[1] TRUE