This function adds population information to a
DNAStringSet
or an AAStringSet
and puts them into the
metadata
information.
__Note__: All unassigned sequences will be put into pop "unassigned"!
Do not use "unassigned" as a population name!
__Note__: Names in a population in the poplist must match sequence names!
__Note__: Duplicated assignments are allowed!
addpop2string(seq, poplist)
DNAStringSet
or AAStringSet
[mandatory]
named list
of populations either as index or names per
population (do not mix index and names in one population) [mandatory]
An object of class DNAStringSet
or AAStringSet
## load example sequence data
data(iupac, package="MSA2dist")
iupac.aa <- iupac |> cds2aa(shorten = TRUE)
## create poplist
poplist <- list(FRA = grep("Mmd.FRA", names(iupac)),
GER = grep("Mmd.GER", names(iupac)),
IRA = grep("Mmd.IRA", names(iupac)),
AFG = grep("Mmm.AFG", names(iupac)))
iupac.aa <- iupac.aa |> addpop2string(poplist)
#(iupac.aa |> slot("metadata"))$pop.integer
iupac.aa |> popinteger()
#> $FRA
#> [1] 1 2 3 4 5 6 7 8
#>
#> $GER
#> [1] 9 10 11 12 13 14 15 16
#>
#> $IRA
#> [1] 17 18 19 20 21 22 23 24
#>
#> $AFG
#> [1] 25 26 27 28 29 30
#>
#(iupac.aa |> slot("metadata"))$pop.names
iupac.aa |> popnames()
#> $FRA
#> [1] "Mmd.FRA.14" "Mmd.FRA.15B" "Mmd.FRA.16B" "Mmd.FRA.18B" "Mmd.FRA.B2C"
#> [6] "Mmd.FRA.C1" "Mmd.FRA.E1" "Mmd.FRA.F1B"
#>
#> $GER
#> [1] "Mmd.GER.TP1" "Mmd.GER.TP121B" "Mmd.GER.TP17-2"
#> [4] "Mmd.GER.TP3-02" "Mmd.GER.TP4a" "Mmd.GER.TP51D"
#> [7] "Mmd.GER.TP7-10F1A2" "Mmd.GER.TP81B"
#>
#> $IRA
#> [1] "Mmd.IRA.AH15" "Mmd.IRA.AH23" "Mmd.IRA.JR11" "Mmd.IRA.JR15"
#> [5] "Mmd.IRA.JR2-F1C" "Mmd.IRA.JR5-F1C" "Mmd.IRA.JR7-F1C" "Mmd.IRA.JR8-F1A"
#>
#> $AFG
#> [1] "Mmm.AFG.396" "Mmm.AFG.413" "Mmm.AFG.416" "Mmm.AFG.424" "Mmm.AFG.435"
#> [6] "Mmm.AFG.444"
#>
## mxixing index and names
poplist <- list(FRA = names(iupac)[grep("Mmd.FRA", names(iupac))],
GER = grep("Mmd.GER", names(iupac)),
IRA = names(iupac)[grep("Mmd.IRA", names(iupac))],
AFG = grep("Mmm.AFG", names(iupac)))
iupac.aa <- iupac.aa |> addpop2string(poplist)
iupac.aa |> popinteger()
#> $FRA
#> [1] 1 2 3 4 5 6 7 8
#>
#> $GER
#> [1] 9 10 11 12 13 14 15 16
#>
#> $IRA
#> [1] 17 18 19 20 21 22 23 24
#>
#> $AFG
#> [1] 25 26 27 28 29 30
#>
iupac.aa |> popnames()
#> $FRA
#> [1] "Mmd.FRA.14" "Mmd.FRA.15B" "Mmd.FRA.16B" "Mmd.FRA.18B" "Mmd.FRA.B2C"
#> [6] "Mmd.FRA.C1" "Mmd.FRA.E1" "Mmd.FRA.F1B"
#>
#> $GER
#> [1] "Mmd.GER.TP1" "Mmd.GER.TP121B" "Mmd.GER.TP17-2"
#> [4] "Mmd.GER.TP3-02" "Mmd.GER.TP4a" "Mmd.GER.TP51D"
#> [7] "Mmd.GER.TP7-10F1A2" "Mmd.GER.TP81B"
#>
#> $IRA
#> [1] "Mmd.IRA.AH15" "Mmd.IRA.AH23" "Mmd.IRA.JR11" "Mmd.IRA.JR15"
#> [5] "Mmd.IRA.JR2-F1C" "Mmd.IRA.JR5-F1C" "Mmd.IRA.JR7-F1C" "Mmd.IRA.JR8-F1A"
#>
#> $AFG
#> [1] "Mmm.AFG.396" "Mmm.AFG.413" "Mmm.AFG.416" "Mmm.AFG.424" "Mmm.AFG.435"
#> [6] "Mmm.AFG.444"
#>
## leaving out some sequences which will be assigned as "unassigned"
poplist <- list(FRA = names(iupac)[grep("Mmd.FRA", names(iupac))],
GER = grep("Mmd.GER", names(iupac)),
IRA = names(iupac)[grep("Mmd.IRA", names(iupac))])
iupac.aa <- iupac.aa |> addpop2string(poplist)
iupac.aa |> popinteger()
#> $FRA
#> [1] 1 2 3 4 5 6 7 8
#>
#> $GER
#> [1] 9 10 11 12 13 14 15 16
#>
#> $IRA
#> [1] 17 18 19 20 21 22 23 24
#>
#> $unassigned
#> [1] 25 26 27 28 29 30
#>
iupac.aa |> popnames()
#> $FRA
#> [1] "Mmd.FRA.14" "Mmd.FRA.15B" "Mmd.FRA.16B" "Mmd.FRA.18B" "Mmd.FRA.B2C"
#> [6] "Mmd.FRA.C1" "Mmd.FRA.E1" "Mmd.FRA.F1B"
#>
#> $GER
#> [1] "Mmd.GER.TP1" "Mmd.GER.TP121B" "Mmd.GER.TP17-2"
#> [4] "Mmd.GER.TP3-02" "Mmd.GER.TP4a" "Mmd.GER.TP51D"
#> [7] "Mmd.GER.TP7-10F1A2" "Mmd.GER.TP81B"
#>
#> $IRA
#> [1] "Mmd.IRA.AH15" "Mmd.IRA.AH23" "Mmd.IRA.JR11" "Mmd.IRA.JR15"
#> [5] "Mmd.IRA.JR2-F1C" "Mmd.IRA.JR5-F1C" "Mmd.IRA.JR7-F1C" "Mmd.IRA.JR8-F1A"
#>
#> $unassigned
#> [1] "Mmm.AFG.396" "Mmm.AFG.413" "Mmm.AFG.416" "Mmm.AFG.424" "Mmm.AFG.435"
#> [6] "Mmm.AFG.444"
#>