This function translates a DNAStringSet into an AAStringSet.

cds2aa(
  cds,
  shorten = FALSE,
  frame = 1,
  framelist = NULL,
  genetic.code = NULL,
  return.cds = FALSE
)

Arguments

cds

DNAStringSet [mandatory]

shorten

shorten all sequences to multiple of three [default: FALSE]

frame

indicates the first base of a the first codon [default: 1]

framelist

supply vector of frames for each entry [default: NULL]

genetic.code

The genetic code to use for the translation of codons into Amino Acid letters [default: NULL]

return.cds

return shorten cds instead of aa [default: FALSE]

Value

AAStringSet

Author

Kristian K Ullrich

Examples

## define two cds sequences
cds1 <- Biostrings::DNAString("ATGCAACATTGC")
cds2 <- Biostrings::DNAString("ATG---CATTGC")
cds1.cds2.aln <- c(Biostrings::DNAStringSet(cds1),
    Biostrings::DNAStringSet(cds2))
#cds2aa(cds1.cds2.aln)
cds1.cds2.aln |> cds2aa()
#> AAStringSet object of length 2:
#>     width seq
#> [1]     4 MQHC
#> [2]     4 MXHC
## alternative genetic code
data(woodmouse, package="ape")
#cds2aa(dnabin2dnastring(woodmouse), shorten=TRUE)
woodmouse |> dnabin2dnastring() |> cds2aa(shorten=TRUE)
#> AAStringSet object of length 15:
#>      width seq                                              names               
#>  [1]   321 XRKTHPLLKXISHSFIDLPAPSN...LLPFLHTSKQRSLIFRPITQTL No305
#>  [2]   321 IRKTHPLLKIINHSFIDLPAPSN...LLPFLHTSKQRSLIFRPITQTL No304
#>  [3]   321 IRKTHPLLKIINHSFIDLPAPSN...LLPFLHTSKQRSLIFRPITQTL No306
#>  [4]   321 IRKTHPLLKIINHSFIDLPAPSN...LLPFLHTSKQRSLIFRPITQIX No0906S
#>  [5]   321 IRKTHPLLKIINHSFIDLPAPSN...LLPFLHTSKQRSLIFRPITQTX No0908S
#>  ...   ... ...
#> [11]   321 IRKTHPLLKIINHSFIDLPAPSN...LLPFLHTSKQRSLIFRPITQTX No1007S
#> [12]   321 XXXXXXXXXXXXXXXIDLPAPSN...LLPFLHTSKQRSLIFRPITQTX No1114S
#> [13]   321 IRKTHPLLKIINHSFIDLPAPSN...LLPFLHTSKQRSLIFRPITQTX No1202S
#> [14]   321 IRKTHPLLKIINHSFIDLPAPSN...LLPFLHTSKQRSLIFRPITQTX No1206S
#> [15]   321 XRKTHPLLKIINHSFIDLPAPSN...LLPFLHTSKQRSLIFRPITQTX No1208S
#cds2aa(dnabin2dnastring(woodmouse), shorten=TRUE,
#genetic.code=Biostrings::getGeneticCode("2"))
woodmouse |> dnabin2dnastring() |> cds2aa(shorten=TRUE,
genetic.code=Biostrings::getGeneticCode("2"))
#> AAStringSet object of length 15:
#>      width seq                                              names               
#>  [1]   321 XRKTHPLLKXISHSFIDLPAPSN...LLPFLHTSKQRSLMFRPITQTL No305
#>  [2]   321 MRKTHPLLKIINHSFIDLPAPSN...LLPFLHTSKQRSLMFRPITQTL No304
#>  [3]   321 MRKTHPLLKIINHSFIDLPAPSN...LLPFLHTSKQRSLMFRPITQTL No306
#>  [4]   321 MRKTHPLLKIINHSFIDLPAPSN...LLPFLHTSKQRSLMFRPITQMX No0906S
#>  [5]   321 MRKTHPLLKIINHSFIDLPAPSN...LLPFLHTSKQRSLMFRPITQTX No0908S
#>  ...   ... ...
#> [11]   321 MRKTHPLLKIINHSFIDLPAPSN...LLPFLHTSKQRSLMFRPITQTX No1007S
#> [12]   321 XXXXXXXXXXXXXXXIDLPAPSN...LLPFLHTSKQRSLMFRPITQTX No1114S
#> [13]   321 MRKTHPLLKIINHSFIDLPAPSN...LLPFLHTSKQRSLMFRPITQTX No1202S
#> [14]   321 MRKTHPLLKIINHSFIDLPAPSN...LLPFLHTSKQRSLMFRPITQTX No1206S
#> [15]   321 XRKTHPLLKIINHSFIDLPAPSN...LLPFLHTSKQRSLMFRPITQTX No1208S
woodmouse |> dnabin2dnastring() |> cds2aa(shorten=TRUE, return.cds=TRUE) |>
cds2aa(genetic.code=Biostrings::getGeneticCode("2"))
#> AAStringSet object of length 15:
#>      width seq                                              names               
#>  [1]   321 XRKTHPLLKXISHSFIDLPAPSN...LLPFLHTSKQRSLMFRPITQTL No305
#>  [2]   321 MRKTHPLLKIINHSFIDLPAPSN...LLPFLHTSKQRSLMFRPITQTL No304
#>  [3]   321 MRKTHPLLKIINHSFIDLPAPSN...LLPFLHTSKQRSLMFRPITQTL No306
#>  [4]   321 MRKTHPLLKIINHSFIDLPAPSN...LLPFLHTSKQRSLMFRPITQMX No0906S
#>  [5]   321 MRKTHPLLKIINHSFIDLPAPSN...LLPFLHTSKQRSLMFRPITQTX No0908S
#>  ...   ... ...
#> [11]   321 MRKTHPLLKIINHSFIDLPAPSN...LLPFLHTSKQRSLMFRPITQTX No1007S
#> [12]   321 XXXXXXXXXXXXXXXIDLPAPSN...LLPFLHTSKQRSLMFRPITQTX No1114S
#> [13]   321 MRKTHPLLKIINHSFIDLPAPSN...LLPFLHTSKQRSLMFRPITQTX No1202S
#> [14]   321 MRKTHPLLKIINHSFIDLPAPSN...LLPFLHTSKQRSLMFRPITQTX No1206S
#> [15]   321 XRKTHPLLKIINHSFIDLPAPSN...LLPFLHTSKQRSLMFRPITQTX No1208S