This function calculates pairwise distances for all combinations
of a AAStringSet
.
aastring2dist(
aa,
threads = 1,
symmetric = TRUE,
score = NULL,
mask = NULL,
region = NULL
)
AAStringSet
[mandatory]
number of parallel threads [default: 1]
symmetric score matrix [default: TRUE]
score matrix
use a score matrix to calculate distances
[mandatory]
IRanges
object indicating masked sites
[default: NULL]
IRanges
object indicating region to use for dist
calculation (by default all sites are used) [default: NULL]
A data.frame
of pairwise distance values
distSTRING
, sites used sitesUsed
and region used
regionUsed
## load example sequence data
data("hiv", package="MSA2dist")
#aastring2dist(cds2aa(hiv), score=granthamMatrix())
hiv |> cds2aa() |> aastring2dist(score=granthamMatrix())
#>
Computing: [========================================] 100% (done)
#> $distSTRING
#> U68496 U68497 U68498 U68499 U68500 U68501 U68502
#> U68496 0.000000 4.43956 11.516484 9.879121 13.098901 17.05495 19.71429
#> U68497 4.439560 0.00000 12.681319 10.406593 13.626374 18.21978 19.35165
#> U68498 11.516484 12.68132 0.000000 8.769231 8.571429 13.25275 16.23077
#> U68499 9.879121 10.40659 8.769231 0.000000 4.780220 15.58242 15.21978
#> U68500 13.098901 13.62637 8.571429 4.780220 0.000000 15.38462 14.35165
#> U68501 17.054945 18.21978 13.252747 15.582418 15.384615 0.00000 16.43956
#> U68502 19.714286 19.35165 16.230769 15.219780 14.351648 16.43956 0.00000
#> U68503 17.824176 19.80220 16.153846 16.582418 14.956044 17.50549 15.98901
#> U68504 13.857143 13.89011 9.516484 10.483516 10.571429 10.75824 11.80220
#> U68505 14.241758 14.27473 10.274725 12.472527 12.560440 13.08791 12.92308
#> U68506 14.923077 14.82418 11.043956 12.010989 12.065934 11.78022 12.87912
#> U68507 18.010989 18.54945 12.153846 14.241758 13.846154 14.91209 14.47253
#> U68508 19.010989 18.90110 15.560440 13.527473 10.890110 13.86813 13.83516
#> U68503 U68504 U68505 U68506 U68507 U68508
#> U68496 17.82418 13.857143 14.241758 14.923077 18.010989 19.01099
#> U68497 19.80220 13.890110 14.274725 14.824176 18.549451 18.90110
#> U68498 16.15385 9.516484 10.274725 11.043956 12.153846 15.56044
#> U68499 16.58242 10.483516 12.472527 12.010989 14.241758 13.52747
#> U68500 14.95604 10.571429 12.560440 12.065934 13.846154 10.89011
#> U68501 17.50549 10.758242 13.087912 11.780220 14.912088 13.86813
#> U68502 15.98901 11.802198 12.923077 12.879121 14.472527 13.83516
#> U68503 0.00000 11.384615 13.494505 12.912088 14.604396 13.69231
#> U68504 11.38462 0.000000 2.439560 1.813187 5.186813 10.27473
#> U68505 13.49451 2.439560 0.000000 4.252747 6.098901 12.24176
#> U68506 12.91209 1.813187 4.252747 0.000000 4.340659 11.80220
#> U68507 14.60440 5.186813 6.098901 4.340659 0.000000 13.49451
#> U68508 13.69231 10.274725 12.241758 11.802198 13.494505 0.00000
#>
#> $sitesUsed
#> U68496 U68497 U68498 U68499 U68500 U68501 U68502 U68503 U68504 U68505
#> U68496 91 91 91 91 91 91 91 91 91 91
#> U68497 91 91 91 91 91 91 91 91 91 91
#> U68498 91 91 91 91 91 91 91 91 91 91
#> U68499 91 91 91 91 91 91 91 91 91 91
#> U68500 91 91 91 91 91 91 91 91 91 91
#> U68501 91 91 91 91 91 91 91 91 91 91
#> U68502 91 91 91 91 91 91 91 91 91 91
#> U68503 91 91 91 91 91 91 91 91 91 91
#> U68504 91 91 91 91 91 91 91 91 91 91
#> U68505 91 91 91 91 91 91 91 91 91 91
#> U68506 91 91 91 91 91 91 91 91 91 91
#> U68507 91 91 91 91 91 91 91 91 91 91
#> U68508 91 91 91 91 91 91 91 91 91 91
#> U68506 U68507 U68508
#> U68496 91 91 91
#> U68497 91 91 91
#> U68498 91 91 91
#> U68499 91 91 91
#> U68500 91 91 91
#> U68501 91 91 91
#> U68502 91 91 91
#> U68503 91 91 91
#> U68504 91 91 91
#> U68505 91 91 91
#> U68506 91 91 91
#> U68507 91 91 91
#> U68508 91 91 91
#>
#> $regionUsed
#> IRanges object with 1 range and 0 metadata columns:
#> start end width
#> <integer> <integer> <integer>
#> [1] 1 91 91
#>
## create mask
mask1 <- IRanges::IRanges(start=c(11,41,71), end=c(20,50,80))
## use mask
hiv |> cds2aa() |> aastring2dist(score=granthamMatrix(), mask=mask1)
#>
Computing: [========================================] 100% (done)
#> $distSTRING
#> U68496 U68497 U68498 U68499 U68500 U68501 U68502
#> U68496 0.000000 4.344262 15.442623 13.000000 17.409836 19.18033 22.37705
#> U68497 4.344262 0.000000 14.901639 11.508197 15.918033 18.63934 21.93443
#> U68498 15.442623 14.901639 0.000000 13.081967 12.393443 14.52459 18.19672
#> U68499 13.000000 11.508197 13.081967 0.000000 6.737705 18.00000 16.68852
#> U68500 17.409836 15.918033 12.393443 6.737705 0.000000 17.31148 15.00000
#> U68501 19.180328 18.639344 14.524590 18.000000 17.311475 0.00000 18.19672
#> U68502 22.377049 21.934426 18.196721 16.688525 15.000000 18.19672 0.00000
#> U68503 21.426230 22.098361 17.196721 17.836066 15.016393 18.90164 18.90164
#> U68504 15.393443 13.901639 9.934426 11.377049 11.114754 11.47541 14.70492
#> U68505 15.967213 14.475410 11.065574 14.344262 14.081967 14.95082 16.37705
#> U68506 16.540984 15.049180 11.770492 13.213115 12.901639 12.55738 15.65574
#> U68507 19.098361 18.557377 11.377049 14.491803 13.508197 15.18033 15.98361
#> U68508 22.737705 20.295082 18.885246 15.852459 11.524590 15.77049 16.73770
#> U68503 U68504 U68505 U68506 U68507 U68508
#> U68496 21.42623 15.393443 15.967213 16.540984 19.098361 22.73770
#> U68497 22.09836 13.901639 14.475410 15.049180 18.557377 20.29508
#> U68498 17.19672 9.934426 11.065574 11.770492 11.377049 18.88525
#> U68499 17.83607 11.377049 14.344262 13.213115 14.491803 15.85246
#> U68500 15.01639 11.114754 14.081967 12.901639 13.508197 11.52459
#> U68501 18.90164 11.475410 14.950820 12.557377 15.180328 15.77049
#> U68502 18.90164 14.704918 16.377049 15.655738 15.983607 16.73770
#> U68503 0.00000 11.688525 14.836066 13.524590 14.000000 14.78689
#> U68504 11.68852 0.000000 3.639344 1.836066 4.819672 14.03279
#> U68505 14.83607 3.639344 0.000000 5.475410 6.180328 16.96721
#> U68506 13.52459 1.836066 5.475410 0.000000 4.426230 15.86885
#> U68507 14.00000 4.819672 6.180328 4.426230 0.000000 16.34426
#> U68508 14.78689 14.032787 16.967213 15.868852 16.344262 0.00000
#>
#> $sitesUsed
#> U68496 U68497 U68498 U68499 U68500 U68501 U68502 U68503 U68504 U68505
#> U68496 61 61 61 61 61 61 61 61 61 61
#> U68497 61 61 61 61 61 61 61 61 61 61
#> U68498 61 61 61 61 61 61 61 61 61 61
#> U68499 61 61 61 61 61 61 61 61 61 61
#> U68500 61 61 61 61 61 61 61 61 61 61
#> U68501 61 61 61 61 61 61 61 61 61 61
#> U68502 61 61 61 61 61 61 61 61 61 61
#> U68503 61 61 61 61 61 61 61 61 61 61
#> U68504 61 61 61 61 61 61 61 61 61 61
#> U68505 61 61 61 61 61 61 61 61 61 61
#> U68506 61 61 61 61 61 61 61 61 61 61
#> U68507 61 61 61 61 61 61 61 61 61 61
#> U68508 61 61 61 61 61 61 61 61 61 61
#> U68506 U68507 U68508
#> U68496 61 61 61
#> U68497 61 61 61
#> U68498 61 61 61
#> U68499 61 61 61
#> U68500 61 61 61
#> U68501 61 61 61
#> U68502 61 61 61
#> U68503 61 61 61
#> U68504 61 61 61
#> U68505 61 61 61
#> U68506 61 61 61
#> U68507 61 61 61
#> U68508 61 61 61
#>
#> $regionUsed
#> IRanges object with 4 ranges and 0 metadata columns:
#> start end width
#> <integer> <integer> <integer>
#> [1] 1 10 10
#> [2] 21 40 20
#> [3] 51 70 20
#> [4] 81 91 11
#>
## use region
region1 <- IRanges::IRanges(start=c(1,75), end=c(45,85))
hiv |> cds2aa() |> aastring2dist(score=granthamMatrix(), region=region1)
#>
Computing: [========================================] 100% (done)
#> $distSTRING
#> U68496 U68497 U68498 U68499 U68500 U68501 U68502
#> U68496 0.000000 5.089286 8.589286 8.875000 11.410714 12.53571 15.91071
#> U68497 5.089286 0.000000 8.357143 7.607143 10.142857 12.30357 14.76786
#> U68498 8.589286 8.357143 0.000000 8.160714 10.750000 12.05357 16.28571
#> U68499 8.875000 7.607143 8.160714 0.000000 3.428571 11.96429 10.78571
#> U68500 11.410714 10.142857 10.750000 3.428571 0.000000 14.55357 12.28571
#> U68501 12.535714 12.303571 12.053571 11.964286 14.553571 0.00000 19.66071
#> U68502 15.910714 14.767857 16.285714 10.785714 12.285714 19.66071 0.00000
#> U68503 13.303571 14.553571 14.785714 10.821429 10.625000 17.19643 15.05357
#> U68504 7.375000 5.303571 6.303571 5.214286 7.803571 10.46429 13.32143
#> U68505 8.964286 6.892857 5.589286 6.446429 9.035714 12.05357 14.55357
#> U68506 8.696429 6.410714 8.375000 7.285714 9.821429 12.53571 14.66071
#> U68507 13.178571 11.928571 9.642857 10.375000 12.178571 16.26786 16.71429
#> U68508 13.107143 11.839286 14.464286 9.250000 9.125000 12.58929 14.96429
#> U68503 U68504 U68505 U68506 U68507 U68508
#> U68496 13.30357 7.375000 8.964286 8.696429 13.178571 13.107143
#> U68497 14.55357 5.303571 6.892857 6.410714 11.928571 11.839286
#> U68498 14.78571 6.303571 5.589286 8.375000 9.642857 14.464286
#> U68499 10.82143 5.214286 6.446429 7.285714 10.375000 9.250000
#> U68500 10.62500 7.803571 9.035714 9.821429 12.178571 9.125000
#> U68501 17.19643 10.464286 12.053571 12.535714 16.267857 12.589286
#> U68502 15.05357 13.321429 14.553571 14.660714 16.714286 14.964286
#> U68503 0.00000 13.089286 14.321429 15.160714 17.375000 11.500000
#> U68504 13.08929 0.000000 1.589286 2.535714 7.303571 9.767857
#> U68505 14.32143 1.589286 0.000000 4.125000 6.589286 11.000000
#> U68506 15.16071 2.535714 4.125000 0.000000 5.517857 11.839286
#> U68507 17.37500 7.303571 6.589286 5.517857 0.000000 14.053571
#> U68508 11.50000 9.767857 11.000000 11.839286 14.053571 0.000000
#>
#> $sitesUsed
#> U68496 U68497 U68498 U68499 U68500 U68501 U68502 U68503 U68504 U68505
#> U68496 56 56 56 56 56 56 56 56 56 56
#> U68497 56 56 56 56 56 56 56 56 56 56
#> U68498 56 56 56 56 56 56 56 56 56 56
#> U68499 56 56 56 56 56 56 56 56 56 56
#> U68500 56 56 56 56 56 56 56 56 56 56
#> U68501 56 56 56 56 56 56 56 56 56 56
#> U68502 56 56 56 56 56 56 56 56 56 56
#> U68503 56 56 56 56 56 56 56 56 56 56
#> U68504 56 56 56 56 56 56 56 56 56 56
#> U68505 56 56 56 56 56 56 56 56 56 56
#> U68506 56 56 56 56 56 56 56 56 56 56
#> U68507 56 56 56 56 56 56 56 56 56 56
#> U68508 56 56 56 56 56 56 56 56 56 56
#> U68506 U68507 U68508
#> U68496 56 56 56
#> U68497 56 56 56
#> U68498 56 56 56
#> U68499 56 56 56
#> U68500 56 56 56
#> U68501 56 56 56
#> U68502 56 56 56
#> U68503 56 56 56
#> U68504 56 56 56
#> U68505 56 56 56
#> U68506 56 56 56
#> U68507 56 56 56
#> U68508 56 56 56
#>
#> $regionUsed
#> IRanges object with 2 ranges and 0 metadata columns:
#> start end width
#> <integer> <integer> <integer>
#> [1] 1 45 45
#> [2] 75 85 11
#>
## use mask and region
hiv |> cds2aa() |> aastring2dist(score=granthamMatrix(),
mask=mask1, region=region1)
#>
Computing: [========================================] 100% (done)
#> $distSTRING
#> U68496 U68497 U68498 U68499 U68500 U68501 U68502
#> U68496 0.000000 5.085714 13.000000 13.457143 16.82857 18.54286 20.08571
#> U68497 5.085714 0.000000 9.571429 8.371429 11.74286 15.11429 19.34286
#> U68498 13.000000 9.571429 0.000000 13.057143 16.51429 18.51429 21.42857
#> U68499 13.457143 8.371429 13.057143 0.000000 4.80000 18.37143 12.62857
#> U68500 16.828571 11.742857 16.514286 4.800000 0.00000 21.82857 14.34286
#> U68501 18.542857 15.114286 18.514286 18.371429 21.82857 0.00000 26.05714
#> U68502 20.085714 19.342857 21.428571 12.628571 14.34286 26.05714 0.00000
#> U68503 16.771429 15.714286 18.400000 12.057143 11.05714 21.48571 17.85714
#> U68504 9.485714 4.400000 8.514286 6.771429 10.22857 14.40000 16.25714
#> U68505 12.028571 6.942857 7.371429 8.742857 12.20000 16.94286 18.22857
#> U68506 10.828571 5.742857 11.057143 9.314286 12.68571 16.94286 17.25714
#> U68507 14.428571 11.000000 9.514286 10.685714 12.88571 19.34286 16.97143
#> U68508 19.485714 14.400000 22.400000 14.057143 13.17143 18.62857 18.57143
#> U68503 U68504 U68505 U68506 U68507 U68508
#> U68496 16.77143 9.485714 12.028571 10.828571 14.428571 19.48571
#> U68497 15.71429 4.400000 6.942857 5.742857 11.000000 14.40000
#> U68498 18.40000 8.514286 7.371429 11.057143 9.514286 22.40000
#> U68499 12.05714 6.771429 8.742857 9.314286 10.685714 14.05714
#> U68500 11.05714 10.228571 12.200000 12.685714 12.885714 13.17143
#> U68501 21.48571 14.400000 16.942857 16.942857 19.342857 18.62857
#> U68502 17.85714 16.257143 18.228571 17.257143 16.971429 18.57143
#> U68503 0.00000 14.114286 16.085714 16.657143 16.628571 12.40000
#> U68504 14.11429 0.000000 2.542857 2.542857 6.600000 14.80000
#> U68505 16.08571 2.542857 0.000000 5.085714 5.457143 16.77143
#> U68506 16.65714 2.542857 5.085714 0.000000 5.257143 17.34286
#> U68507 16.62857 6.600000 5.457143 5.257143 0.000000 17.31429
#> U68508 12.40000 14.800000 16.771429 17.342857 17.314286 0.00000
#>
#> $sitesUsed
#> U68496 U68497 U68498 U68499 U68500 U68501 U68502 U68503 U68504 U68505
#> U68496 35 35 35 35 35 35 35 35 35 35
#> U68497 35 35 35 35 35 35 35 35 35 35
#> U68498 35 35 35 35 35 35 35 35 35 35
#> U68499 35 35 35 35 35 35 35 35 35 35
#> U68500 35 35 35 35 35 35 35 35 35 35
#> U68501 35 35 35 35 35 35 35 35 35 35
#> U68502 35 35 35 35 35 35 35 35 35 35
#> U68503 35 35 35 35 35 35 35 35 35 35
#> U68504 35 35 35 35 35 35 35 35 35 35
#> U68505 35 35 35 35 35 35 35 35 35 35
#> U68506 35 35 35 35 35 35 35 35 35 35
#> U68507 35 35 35 35 35 35 35 35 35 35
#> U68508 35 35 35 35 35 35 35 35 35 35
#> U68506 U68507 U68508
#> U68496 35 35 35
#> U68497 35 35 35
#> U68498 35 35 35
#> U68499 35 35 35
#> U68500 35 35 35
#> U68501 35 35 35
#> U68502 35 35 35
#> U68503 35 35 35
#> U68504 35 35 35
#> U68505 35 35 35
#> U68506 35 35 35
#> U68507 35 35 35
#> U68508 35 35 35
#>
#> $regionUsed
#> IRanges object with 3 ranges and 0 metadata columns:
#> start end width
#> <integer> <integer> <integer>
#> [1] 1 10 10
#> [2] 21 40 20
#> [3] 81 85 5
#>
## use asymmetric score matrix
myscore <- granthamMatrix()
myscore[5, 6] <- 0
h <- hiv |> cds2aa() |> aastring2dist(score=myscore, symmetric=FALSE)
#>
Computing: [========================================] 100% (done)
h$distSTRING[1:2, 1:2]
#> U68496 U68497
#> U68496 0.00000 3.802198
#> U68497 4.43956 0.000000