This function calculates pairwise distances for all combinations of a AAStringSet.

aastring2dist(
  aa,
  threads = 1,
  symmetric = TRUE,
  score = NULL,
  mask = NULL,
  region = NULL
)

Arguments

aa

AAStringSet [mandatory]

threads

number of parallel threads [default: 1]

symmetric

symmetric score matrix [default: TRUE]

score

score matrix use a score matrix to calculate distances [mandatory]

mask

IRanges object indicating masked sites [default: NULL]

region

IRanges object indicating region to use for dist calculation (by default all sites are used) [default: NULL]

Value

A data.frame of pairwise distance values distSTRING, sites used sitesUsed and region used regionUsed

See also

Author

Kristian K Ullrich

Examples

## load example sequence data
data("hiv", package="MSA2dist")
#aastring2dist(cds2aa(hiv), score=granthamMatrix())
hiv |> cds2aa() |> aastring2dist(score=granthamMatrix())
#> 
Computing: [========================================] 100% (done)                         
#> $distSTRING
#>           U68496   U68497    U68498    U68499    U68500   U68501   U68502
#> U68496  0.000000  4.43956 11.516484  9.879121 13.098901 17.05495 19.71429
#> U68497  4.439560  0.00000 12.681319 10.406593 13.626374 18.21978 19.35165
#> U68498 11.516484 12.68132  0.000000  8.769231  8.571429 13.25275 16.23077
#> U68499  9.879121 10.40659  8.769231  0.000000  4.780220 15.58242 15.21978
#> U68500 13.098901 13.62637  8.571429  4.780220  0.000000 15.38462 14.35165
#> U68501 17.054945 18.21978 13.252747 15.582418 15.384615  0.00000 16.43956
#> U68502 19.714286 19.35165 16.230769 15.219780 14.351648 16.43956  0.00000
#> U68503 17.824176 19.80220 16.153846 16.582418 14.956044 17.50549 15.98901
#> U68504 13.857143 13.89011  9.516484 10.483516 10.571429 10.75824 11.80220
#> U68505 14.241758 14.27473 10.274725 12.472527 12.560440 13.08791 12.92308
#> U68506 14.923077 14.82418 11.043956 12.010989 12.065934 11.78022 12.87912
#> U68507 18.010989 18.54945 12.153846 14.241758 13.846154 14.91209 14.47253
#> U68508 19.010989 18.90110 15.560440 13.527473 10.890110 13.86813 13.83516
#>          U68503    U68504    U68505    U68506    U68507   U68508
#> U68496 17.82418 13.857143 14.241758 14.923077 18.010989 19.01099
#> U68497 19.80220 13.890110 14.274725 14.824176 18.549451 18.90110
#> U68498 16.15385  9.516484 10.274725 11.043956 12.153846 15.56044
#> U68499 16.58242 10.483516 12.472527 12.010989 14.241758 13.52747
#> U68500 14.95604 10.571429 12.560440 12.065934 13.846154 10.89011
#> U68501 17.50549 10.758242 13.087912 11.780220 14.912088 13.86813
#> U68502 15.98901 11.802198 12.923077 12.879121 14.472527 13.83516
#> U68503  0.00000 11.384615 13.494505 12.912088 14.604396 13.69231
#> U68504 11.38462  0.000000  2.439560  1.813187  5.186813 10.27473
#> U68505 13.49451  2.439560  0.000000  4.252747  6.098901 12.24176
#> U68506 12.91209  1.813187  4.252747  0.000000  4.340659 11.80220
#> U68507 14.60440  5.186813  6.098901  4.340659  0.000000 13.49451
#> U68508 13.69231 10.274725 12.241758 11.802198 13.494505  0.00000
#> 
#> $sitesUsed
#>        U68496 U68497 U68498 U68499 U68500 U68501 U68502 U68503 U68504 U68505
#> U68496     91     91     91     91     91     91     91     91     91     91
#> U68497     91     91     91     91     91     91     91     91     91     91
#> U68498     91     91     91     91     91     91     91     91     91     91
#> U68499     91     91     91     91     91     91     91     91     91     91
#> U68500     91     91     91     91     91     91     91     91     91     91
#> U68501     91     91     91     91     91     91     91     91     91     91
#> U68502     91     91     91     91     91     91     91     91     91     91
#> U68503     91     91     91     91     91     91     91     91     91     91
#> U68504     91     91     91     91     91     91     91     91     91     91
#> U68505     91     91     91     91     91     91     91     91     91     91
#> U68506     91     91     91     91     91     91     91     91     91     91
#> U68507     91     91     91     91     91     91     91     91     91     91
#> U68508     91     91     91     91     91     91     91     91     91     91
#>        U68506 U68507 U68508
#> U68496     91     91     91
#> U68497     91     91     91
#> U68498     91     91     91
#> U68499     91     91     91
#> U68500     91     91     91
#> U68501     91     91     91
#> U68502     91     91     91
#> U68503     91     91     91
#> U68504     91     91     91
#> U68505     91     91     91
#> U68506     91     91     91
#> U68507     91     91     91
#> U68508     91     91     91
#> 
#> $regionUsed
#> IRanges object with 1 range and 0 metadata columns:
#>           start       end     width
#>       <integer> <integer> <integer>
#>   [1]         1        91        91
#> 
## create mask
mask1 <- IRanges::IRanges(start=c(11,41,71), end=c(20,50,80))
## use mask
hiv |> cds2aa() |> aastring2dist(score=granthamMatrix(), mask=mask1)
#> 
Computing: [========================================] 100% (done)                         
#> $distSTRING
#>           U68496    U68497    U68498    U68499    U68500   U68501   U68502
#> U68496  0.000000  4.344262 15.442623 13.000000 17.409836 19.18033 22.37705
#> U68497  4.344262  0.000000 14.901639 11.508197 15.918033 18.63934 21.93443
#> U68498 15.442623 14.901639  0.000000 13.081967 12.393443 14.52459 18.19672
#> U68499 13.000000 11.508197 13.081967  0.000000  6.737705 18.00000 16.68852
#> U68500 17.409836 15.918033 12.393443  6.737705  0.000000 17.31148 15.00000
#> U68501 19.180328 18.639344 14.524590 18.000000 17.311475  0.00000 18.19672
#> U68502 22.377049 21.934426 18.196721 16.688525 15.000000 18.19672  0.00000
#> U68503 21.426230 22.098361 17.196721 17.836066 15.016393 18.90164 18.90164
#> U68504 15.393443 13.901639  9.934426 11.377049 11.114754 11.47541 14.70492
#> U68505 15.967213 14.475410 11.065574 14.344262 14.081967 14.95082 16.37705
#> U68506 16.540984 15.049180 11.770492 13.213115 12.901639 12.55738 15.65574
#> U68507 19.098361 18.557377 11.377049 14.491803 13.508197 15.18033 15.98361
#> U68508 22.737705 20.295082 18.885246 15.852459 11.524590 15.77049 16.73770
#>          U68503    U68504    U68505    U68506    U68507   U68508
#> U68496 21.42623 15.393443 15.967213 16.540984 19.098361 22.73770
#> U68497 22.09836 13.901639 14.475410 15.049180 18.557377 20.29508
#> U68498 17.19672  9.934426 11.065574 11.770492 11.377049 18.88525
#> U68499 17.83607 11.377049 14.344262 13.213115 14.491803 15.85246
#> U68500 15.01639 11.114754 14.081967 12.901639 13.508197 11.52459
#> U68501 18.90164 11.475410 14.950820 12.557377 15.180328 15.77049
#> U68502 18.90164 14.704918 16.377049 15.655738 15.983607 16.73770
#> U68503  0.00000 11.688525 14.836066 13.524590 14.000000 14.78689
#> U68504 11.68852  0.000000  3.639344  1.836066  4.819672 14.03279
#> U68505 14.83607  3.639344  0.000000  5.475410  6.180328 16.96721
#> U68506 13.52459  1.836066  5.475410  0.000000  4.426230 15.86885
#> U68507 14.00000  4.819672  6.180328  4.426230  0.000000 16.34426
#> U68508 14.78689 14.032787 16.967213 15.868852 16.344262  0.00000
#> 
#> $sitesUsed
#>        U68496 U68497 U68498 U68499 U68500 U68501 U68502 U68503 U68504 U68505
#> U68496     61     61     61     61     61     61     61     61     61     61
#> U68497     61     61     61     61     61     61     61     61     61     61
#> U68498     61     61     61     61     61     61     61     61     61     61
#> U68499     61     61     61     61     61     61     61     61     61     61
#> U68500     61     61     61     61     61     61     61     61     61     61
#> U68501     61     61     61     61     61     61     61     61     61     61
#> U68502     61     61     61     61     61     61     61     61     61     61
#> U68503     61     61     61     61     61     61     61     61     61     61
#> U68504     61     61     61     61     61     61     61     61     61     61
#> U68505     61     61     61     61     61     61     61     61     61     61
#> U68506     61     61     61     61     61     61     61     61     61     61
#> U68507     61     61     61     61     61     61     61     61     61     61
#> U68508     61     61     61     61     61     61     61     61     61     61
#>        U68506 U68507 U68508
#> U68496     61     61     61
#> U68497     61     61     61
#> U68498     61     61     61
#> U68499     61     61     61
#> U68500     61     61     61
#> U68501     61     61     61
#> U68502     61     61     61
#> U68503     61     61     61
#> U68504     61     61     61
#> U68505     61     61     61
#> U68506     61     61     61
#> U68507     61     61     61
#> U68508     61     61     61
#> 
#> $regionUsed
#> IRanges object with 4 ranges and 0 metadata columns:
#>           start       end     width
#>       <integer> <integer> <integer>
#>   [1]         1        10        10
#>   [2]        21        40        20
#>   [3]        51        70        20
#>   [4]        81        91        11
#> 
## use region
region1 <- IRanges::IRanges(start=c(1,75), end=c(45,85))
hiv |> cds2aa() |> aastring2dist(score=granthamMatrix(), region=region1)
#> 
Computing: [========================================] 100% (done)                         
#> $distSTRING
#>           U68496    U68497    U68498    U68499    U68500   U68501   U68502
#> U68496  0.000000  5.089286  8.589286  8.875000 11.410714 12.53571 15.91071
#> U68497  5.089286  0.000000  8.357143  7.607143 10.142857 12.30357 14.76786
#> U68498  8.589286  8.357143  0.000000  8.160714 10.750000 12.05357 16.28571
#> U68499  8.875000  7.607143  8.160714  0.000000  3.428571 11.96429 10.78571
#> U68500 11.410714 10.142857 10.750000  3.428571  0.000000 14.55357 12.28571
#> U68501 12.535714 12.303571 12.053571 11.964286 14.553571  0.00000 19.66071
#> U68502 15.910714 14.767857 16.285714 10.785714 12.285714 19.66071  0.00000
#> U68503 13.303571 14.553571 14.785714 10.821429 10.625000 17.19643 15.05357
#> U68504  7.375000  5.303571  6.303571  5.214286  7.803571 10.46429 13.32143
#> U68505  8.964286  6.892857  5.589286  6.446429  9.035714 12.05357 14.55357
#> U68506  8.696429  6.410714  8.375000  7.285714  9.821429 12.53571 14.66071
#> U68507 13.178571 11.928571  9.642857 10.375000 12.178571 16.26786 16.71429
#> U68508 13.107143 11.839286 14.464286  9.250000  9.125000 12.58929 14.96429
#>          U68503    U68504    U68505    U68506    U68507    U68508
#> U68496 13.30357  7.375000  8.964286  8.696429 13.178571 13.107143
#> U68497 14.55357  5.303571  6.892857  6.410714 11.928571 11.839286
#> U68498 14.78571  6.303571  5.589286  8.375000  9.642857 14.464286
#> U68499 10.82143  5.214286  6.446429  7.285714 10.375000  9.250000
#> U68500 10.62500  7.803571  9.035714  9.821429 12.178571  9.125000
#> U68501 17.19643 10.464286 12.053571 12.535714 16.267857 12.589286
#> U68502 15.05357 13.321429 14.553571 14.660714 16.714286 14.964286
#> U68503  0.00000 13.089286 14.321429 15.160714 17.375000 11.500000
#> U68504 13.08929  0.000000  1.589286  2.535714  7.303571  9.767857
#> U68505 14.32143  1.589286  0.000000  4.125000  6.589286 11.000000
#> U68506 15.16071  2.535714  4.125000  0.000000  5.517857 11.839286
#> U68507 17.37500  7.303571  6.589286  5.517857  0.000000 14.053571
#> U68508 11.50000  9.767857 11.000000 11.839286 14.053571  0.000000
#> 
#> $sitesUsed
#>        U68496 U68497 U68498 U68499 U68500 U68501 U68502 U68503 U68504 U68505
#> U68496     56     56     56     56     56     56     56     56     56     56
#> U68497     56     56     56     56     56     56     56     56     56     56
#> U68498     56     56     56     56     56     56     56     56     56     56
#> U68499     56     56     56     56     56     56     56     56     56     56
#> U68500     56     56     56     56     56     56     56     56     56     56
#> U68501     56     56     56     56     56     56     56     56     56     56
#> U68502     56     56     56     56     56     56     56     56     56     56
#> U68503     56     56     56     56     56     56     56     56     56     56
#> U68504     56     56     56     56     56     56     56     56     56     56
#> U68505     56     56     56     56     56     56     56     56     56     56
#> U68506     56     56     56     56     56     56     56     56     56     56
#> U68507     56     56     56     56     56     56     56     56     56     56
#> U68508     56     56     56     56     56     56     56     56     56     56
#>        U68506 U68507 U68508
#> U68496     56     56     56
#> U68497     56     56     56
#> U68498     56     56     56
#> U68499     56     56     56
#> U68500     56     56     56
#> U68501     56     56     56
#> U68502     56     56     56
#> U68503     56     56     56
#> U68504     56     56     56
#> U68505     56     56     56
#> U68506     56     56     56
#> U68507     56     56     56
#> U68508     56     56     56
#> 
#> $regionUsed
#> IRanges object with 2 ranges and 0 metadata columns:
#>           start       end     width
#>       <integer> <integer> <integer>
#>   [1]         1        45        45
#>   [2]        75        85        11
#> 
## use mask and region
hiv |> cds2aa() |> aastring2dist(score=granthamMatrix(),
    mask=mask1, region=region1)
#> 
Computing: [========================================] 100% (done)                         
#> $distSTRING
#>           U68496    U68497    U68498    U68499   U68500   U68501   U68502
#> U68496  0.000000  5.085714 13.000000 13.457143 16.82857 18.54286 20.08571
#> U68497  5.085714  0.000000  9.571429  8.371429 11.74286 15.11429 19.34286
#> U68498 13.000000  9.571429  0.000000 13.057143 16.51429 18.51429 21.42857
#> U68499 13.457143  8.371429 13.057143  0.000000  4.80000 18.37143 12.62857
#> U68500 16.828571 11.742857 16.514286  4.800000  0.00000 21.82857 14.34286
#> U68501 18.542857 15.114286 18.514286 18.371429 21.82857  0.00000 26.05714
#> U68502 20.085714 19.342857 21.428571 12.628571 14.34286 26.05714  0.00000
#> U68503 16.771429 15.714286 18.400000 12.057143 11.05714 21.48571 17.85714
#> U68504  9.485714  4.400000  8.514286  6.771429 10.22857 14.40000 16.25714
#> U68505 12.028571  6.942857  7.371429  8.742857 12.20000 16.94286 18.22857
#> U68506 10.828571  5.742857 11.057143  9.314286 12.68571 16.94286 17.25714
#> U68507 14.428571 11.000000  9.514286 10.685714 12.88571 19.34286 16.97143
#> U68508 19.485714 14.400000 22.400000 14.057143 13.17143 18.62857 18.57143
#>          U68503    U68504    U68505    U68506    U68507   U68508
#> U68496 16.77143  9.485714 12.028571 10.828571 14.428571 19.48571
#> U68497 15.71429  4.400000  6.942857  5.742857 11.000000 14.40000
#> U68498 18.40000  8.514286  7.371429 11.057143  9.514286 22.40000
#> U68499 12.05714  6.771429  8.742857  9.314286 10.685714 14.05714
#> U68500 11.05714 10.228571 12.200000 12.685714 12.885714 13.17143
#> U68501 21.48571 14.400000 16.942857 16.942857 19.342857 18.62857
#> U68502 17.85714 16.257143 18.228571 17.257143 16.971429 18.57143
#> U68503  0.00000 14.114286 16.085714 16.657143 16.628571 12.40000
#> U68504 14.11429  0.000000  2.542857  2.542857  6.600000 14.80000
#> U68505 16.08571  2.542857  0.000000  5.085714  5.457143 16.77143
#> U68506 16.65714  2.542857  5.085714  0.000000  5.257143 17.34286
#> U68507 16.62857  6.600000  5.457143  5.257143  0.000000 17.31429
#> U68508 12.40000 14.800000 16.771429 17.342857 17.314286  0.00000
#> 
#> $sitesUsed
#>        U68496 U68497 U68498 U68499 U68500 U68501 U68502 U68503 U68504 U68505
#> U68496     35     35     35     35     35     35     35     35     35     35
#> U68497     35     35     35     35     35     35     35     35     35     35
#> U68498     35     35     35     35     35     35     35     35     35     35
#> U68499     35     35     35     35     35     35     35     35     35     35
#> U68500     35     35     35     35     35     35     35     35     35     35
#> U68501     35     35     35     35     35     35     35     35     35     35
#> U68502     35     35     35     35     35     35     35     35     35     35
#> U68503     35     35     35     35     35     35     35     35     35     35
#> U68504     35     35     35     35     35     35     35     35     35     35
#> U68505     35     35     35     35     35     35     35     35     35     35
#> U68506     35     35     35     35     35     35     35     35     35     35
#> U68507     35     35     35     35     35     35     35     35     35     35
#> U68508     35     35     35     35     35     35     35     35     35     35
#>        U68506 U68507 U68508
#> U68496     35     35     35
#> U68497     35     35     35
#> U68498     35     35     35
#> U68499     35     35     35
#> U68500     35     35     35
#> U68501     35     35     35
#> U68502     35     35     35
#> U68503     35     35     35
#> U68504     35     35     35
#> U68505     35     35     35
#> U68506     35     35     35
#> U68507     35     35     35
#> U68508     35     35     35
#> 
#> $regionUsed
#> IRanges object with 3 ranges and 0 metadata columns:
#>           start       end     width
#>       <integer> <integer> <integer>
#>   [1]         1        10        10
#>   [2]        21        40        20
#>   [3]        81        85         5
#> 
## use asymmetric score matrix
myscore <- granthamMatrix()
myscore[5, 6] <- 0
h <- hiv |> cds2aa() |> aastring2dist(score=myscore, symmetric=FALSE)
#> 
Computing: [========================================] 100% (done)                         
h$distSTRING[1:2, 1:2]
#>         U68496   U68497
#> U68496 0.00000 3.802198
#> U68497 4.43956 0.000000