In [1]:
%load_ext rpy2.ipython

In [2]:
%%R
library(ggplot2)
library(dplyr)
library(tidyr)
library(gridExtra)
library(phyloseq)
library(vegan)
library(scatterplot3d)


Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union

Loading required package: grid
Loading required package: permute
Loading required package: lattice
This is vegan 2.3-0

In [62]:
%%R
otu.tbl.file1 = '/home/nick/notebook/SIPSim/dev/bac_genome1210/atomIncorp_taxaIncorp/0/10/1/OTU_n2_abs1e9_sub-norm_filt.physeq'
otu.tbl.file2 = '/home/nick/notebook/SIPSim/dev/bac_genome1210/atomIncorp_taxaIncorp/100/10/1/OTU_n2_abs1e9_sub-norm_filt.physeq'

physeq1 = readRDS(otu.tbl.file1)
physeq2 = readRDS(otu.tbl.file2)

In [63]:
%%R

ord1 = ordinate(physeq1, method='NMDS', distance='bray')
#p1 = plot_ordination(physeq1, ord1, justDF=T)

ord2 = ordinate(physeq2, method='NMDS', distance='bray')
#p2 = plot_ordination(physeq2, ord2, justDF=T)

ord1 %>% head %>% print
ord2 %>% head %>% print


Square root transformation
Wisconsin double standardization
Run 0 stress 0.0426932 
Run 1 stress 0.05602875 
Run 2 stress 0.04784602 
Run 3 stress 0.05883678 
Run 4 stress 0.0433072 
Run 5 stress 0.05247778 
Run 6 stress 0.05137602 
Run 7 stress 0.05137587 
Run 8 stress 0.04724677 
Run 9 stress 0.05248773 
Run 10 stress 0.05928348 
Run 11 stress 0.04784435 
Run 12 stress 0.04330974 
Run 13 stress 0.05637131 
Run 14 stress 0.04269324 
... procrustes: rmse 8.383037e-05  max resid 0.0001682424 
*** Solution reached
Square root transformation
Wisconsin double standardization
Run 0 stress 0.06218492 
Run 1 stress 0.06218456 
... New best solution
... procrustes: rmse 6.735155e-05  max resid 0.0001562711 
*** Solution reached
$nobj
[1] 21

$nfix
[1] 0

$ndim
[1] 2

$ndis
[1] 210

$ngrp
[1] 1

$diss
  [1] 0.06947495 0.08977843 0.09788153 0.10235470 0.11139204 0.11162789
  [7] 0.12183417 0.13108392 0.13510692 0.14783645 0.15584220 0.17077085
 [13] 0.17090523 0.17283236 0.17593318 0.17914761 0.18146233 0.19062755
 [19] 0.19256452 0.19558722 0.19724572 0.19868233 0.20808890 0.20865214
 [25] 0.21239917 0.21753437 0.22609179 0.23450608 0.23761982 0.23974434
 [31] 0.24074697 0.24163471 0.24419270 0.24721401 0.25014112 0.25368968
 [37] 0.25479667 0.25739565 0.26195443 0.26995717 0.27033203 0.27466592
 [43] 0.27778029 0.28015993 0.28054972 0.28180240 0.29245155 0.29303025
 [49] 0.29644985 0.30143937 0.30922782 0.31456379 0.31516640 0.31630800
 [55] 0.31664058 0.31780672 0.31782853 0.31963750 0.32177793 0.33008613
 [61] 0.33035941 0.33061170 0.33274011 0.33692795 0.33725654 0.33890733
 [67] 0.34078111 0.34185927 0.34281214 0.34323496 0.34355141 0.34635538
 [73] 0.34647277 0.35067928 0.35315654 0.35695693 0.36078241 0.37087669
 [79] 0.37309281 0.37801738 0.37919376 0.38168707 0.38192000 0.38387555
 [85] 0.38389135 0.38906061 0.39041057 0.39410981 0.39479297 0.39907749
 [91] 0.40030544 0.40272143 0.40995637 0.41269088 0.41273362 0.41400201
 [97] 0.41491160 0.41915410 0.41954510 0.42347993 0.42690675 0.43714608
[103] 0.43852653 0.44423074 0.44647762 0.44666583 0.45309737 0.46115336
[109] 0.46995352 0.47515262 0.47651994 0.47734251 0.47852634 0.48135876
[115] 0.48376511 0.48672691 0.49665665 0.49889113 0.50115166 0.50175764
[121] 0.50549226 0.50931938 0.51168347 0.51347446 0.51642374 0.51720197
[127] 0.51859507 0.52114061 0.52326273 0.53716749 0.54782499 0.55578201
[133] 0.55991182 0.56147039 0.56271528 0.56452265 0.56503194 0.56878180
[139] 0.57403329 0.58161336 0.59031706 0.59078982 0.59688834 0.59797415
[145] 0.59830413 0.60374679 0.60593386 0.60612463 0.61320737 0.62513839
[151] 0.63906440 0.64325912 0.64787227 0.64943060 0.65285574 0.66488194
[157] 0.66573014 0.67456921 0.67532829 0.67740964 0.68424052 0.68682057
[163] 0.69190731 0.69648523 0.69730331 0.70560612 0.70809700 0.71039760
[169] 0.71061112 0.72465394 0.72873151 0.72975077 0.73471829 0.74146035
[175] 0.74160177 0.74191653 0.74723283 0.74918832 0.75100936 0.76689512
[181] 0.77242089 0.77627141 0.79363832 0.79473337 0.79741402 0.80342422
[187] 0.81251391 0.81613663 0.82486271 0.82620306 0.82683095 0.83381002
[193] 0.84936244 0.85321026 0.85388961 0.85619395 0.86314214 0.86824924
[199] 0.86878981 0.86914711 0.87187279 0.87283799 0.88690146 0.88761747
[205] 0.88990201 0.89366495 0.89562820 0.89851367 0.90850002 0.92134838

$nobj
[1] 19

$nfix
[1] 0

$ndim
[1] 2

$ndis
[1] 171

$ngrp
[1] 1

$diss
  [1] 0.1729370 0.1744309 0.1782342 0.1802090 0.2011150 0.2043980 0.2154959
  [8] 0.2270125 0.2552740 0.2574600 0.2609812 0.2732357 0.2740714 0.2781838
 [15] 0.2806657 0.2888832 0.3006292 0.3007999 0.3037792 0.3119973 0.3227287
 [22] 0.3324818 0.3434324 0.3440244 0.3481108 0.3499950 0.3538652 0.3603389
 [29] 0.3622985 0.3755064 0.3841882 0.3884146 0.3892476 0.3893932 0.3920163
 [36] 0.3924341 0.3930551 0.4025055 0.4060829 0.4102152 0.4138210 0.4253359
 [43] 0.4264019 0.4290116 0.4294240 0.4336282 0.4390729 0.4398058 0.4437058
 [50] 0.4457556 0.4472457 0.4680040 0.4714378 0.4717615 0.4842952 0.4896227
 [57] 0.4908623 0.4923380 0.4950572 0.4993226 0.5131259 0.5141705 0.5143093
 [64] 0.5166833 0.5285150 0.5381472 0.5456990 0.5478804 0.5489250 0.5542354
 [71] 0.5602948 0.5684118 0.5750894 0.5785246 0.5862985 0.5906261 0.5984535
 [78] 0.5990938 0.6044712 0.6046441 0.6052120 0.6187355 0.6198641 0.6348528
 [85] 0.6442265 0.6524278 0.6544872 0.6588947 0.6605723 0.6663246 0.6751714
 [92] 0.6780714 0.6868746 0.6873328 0.6915409 0.6947921 0.6958430 0.7008226
 [99] 0.7012985 0.7059088 0.7109209 0.7116497 0.7120122 0.7196315 0.7196587
[106] 0.7249641 0.7349590 0.7360939 0.7406608 0.7472021 0.7541109 0.7591506
[113] 0.7603504 0.7629092 0.7716825 0.7743737 0.7778442 0.7811788 0.7817023
[120] 0.7839609 0.7844262 0.7850641 0.7902717 0.7920296 0.7923304 0.7935315
[127] 0.7945968 0.7951849 0.7999599 0.8005030 0.8011001 0.8058018 0.8121191
[134] 0.8127574 0.8168747 0.8195636 0.8209675 0.8233717 0.8273166 0.8362277
[141] 0.8377723 0.8411864 0.8415680 0.8440274 0.8458915 0.8465028 0.8472639
[148] 0.8494916 0.8537718 0.8576168 0.8615900 0.8634211 0.8684427 0.8689283
[155] 0.8723969 0.8766570 0.8767021 0.8770816 0.8795455 0.8796904 0.8843561
[162] 0.8939932 0.8950610 0.9007356 0.9153310 0.9181907 0.9188883 0.9274667
[169] 0.9372479 0.9383127 0.9496005


In [69]:
%%R
otu.tbl = physeq1 %>% otu_table %>% t
bc1 = vegdist(otu.tbl)

bc1.col = data.frame(t(combn(rownames(otu.tbl),2)), as.numeric(bc1))
colnames(bc1.col) = c('X1', 'X2', 'dist')

bc1.col$X1.lib = gsub('__.+', '', bc1.col$X1)
bc1.col$X2.lib = gsub('__.+', '', bc1.col$X2)

bc1.col$X1.F.start = gsub('.+__([0-9.]+)-.+', '\\1', bc1.col$X1)
bc1.col$X2.F.start = gsub('.+__([0-9.]+)-.+', '\\1', bc1.col$X2)

bc1.col$X1.F.end = gsub('.+-', '', bc1.col$X1)
bc1.col$X2.F.end = gsub('.+-', '', bc1.col$X2)

bc1.col %>% head


              X1             X2      dist X1.lib X2.lib X1.F.start X2.F.start
1 1__1.708-1.712 1__1.712-1.717 0.4413102      1      1      1.708      1.712
2 1__1.708-1.712 1__1.717-1.719 0.6651426      1      1      1.708      1.717
3 1__1.708-1.712 1__1.719-1.725 0.8261074      1      1      1.708      1.719
4 1__1.708-1.712 1__1.725-1.729 0.9495660      1      1      1.708      1.725
5 1__1.708-1.712 1__1.729-1.732 0.9771378      1      1      1.708      1.729
6 1__1.708-1.712 1__1.732-1.737 0.9829276      1      1      1.708      1.732
  X1.F.end X2.F.end
1    1.712    1.717
2    1.712    1.719
3    1.712    1.725
4    1.712    1.729
5    1.712    1.732
6    1.712    1.737

In [70]:
%%R
otu.tbl = physeq2 %>% otu_table %>% t
bc2 = vegdist(otu.tbl)

bc2.col = data.frame(t(combn(rownames(otu.tbl),2)), as.numeric(bc2))
colnames(bc2.col) = c('X1', 'X2', 'dist')


bc2.col$X1.lib = gsub('__.+', '', bc2.col$X1)
bc2.col$X2.lib = gsub('__.+', '', bc2.col$X2)

bc2.col$X1.F.start = gsub('.+__([0-9.]+)-.+', '\\1', bc2.col$X1)
bc2.col$X2.F.start = gsub('.+__([0-9.]+)-.+', '\\1', bc2.col$X2)

bc2.col$X1.F.end = gsub('.+-', '', bc2.col$X1)
bc2.col$X2.F.end = gsub('.+-', '', bc2.col$X2)

bc2.col %>% head


              X1             X2      dist X1.lib X2.lib X1.F.start X2.F.start
1 1__1.710-1.716 1__1.716-1.718 0.4458798      1      1      1.710      1.716
2 1__1.710-1.716 1__1.718-1.724 0.6242224      1      1      1.710      1.718
3 1__1.710-1.716 1__1.724-1.730 0.9026234      1      1      1.710      1.724
4 1__1.710-1.716 1__1.730-1.736 0.9741819      1      1      1.710      1.730
5 1__1.710-1.716 1__1.736-1.739 0.9870906      1      1      1.710      1.736
6 1__1.710-1.716 1__1.739-1.742 0.9833607      1      1      1.710      1.739
  X1.F.end X2.F.end
1    1.716    1.718
2    1.716    1.724
3    1.716    1.730
4    1.716    1.736
5    1.716    1.739
6    1.716    1.742

In [93]:
%%R
bc1.col$file = 1
bc2.col$file = 2

asNum = function (x){ as.numeric(as.character(x)) }

tbl.j = rbind(bc1.col, bc2.col)
tbl.j = tbl.j %>%
    filter(X1.lib != X2.lib) %>% 
    mutate(X1.F.start = asNum(X1.F.start),
           X2.F.start = asNum(X2.F.start),
           F.start.dist = abs(X1.F.start - X2.F.start),
           dist = asNum(dist),
           file = as.character(file))

#scatterplot3d(tbl.j$X1.F.start, tbl.j$X2.F.start, tbl.j$dist, color=tbl.j$file)

In [98]:
%%R -w 500 -h 400

ggplot(tbl.j, aes(F.start.dist, dist, color=file)) +
    geom_point()


Mapping fractions between gradient communities in order to perform procrustes


In [99]:
%%R
otu.tbl.file1 = '/home/nick/notebook/SIPSim/dev/bac_genome1210/atomIncorp_taxaIncorp/0/10/1/OTU_n2_abs1e9_sub-norm_filt.physeq'
otu.tbl.file2 = '/home/nick/notebook/SIPSim/dev/bac_genome1210/atomIncorp_taxaIncorp/100/10/1/OTU_n2_abs1e9_sub-norm_filt.physeq'

physeq1 = readRDS(otu.tbl.file1)
physeq2 = readRDS(otu.tbl.file2)

In [103]:
%%R

ord1 = ordinate(physeq1, method='NMDS', distance='bray')
ord2 = ordinate(physeq2, method='NMDS', distance='bray')

ord1 %>% scores %>% head %>% print
ord2 %>% scores %>% head %>% print


Square root transformation
Wisconsin double standardization
Run 0 stress 0.0426932 
Run 1 stress 0.05688916 
Run 2 stress 0.04771449 
Run 3 stress 0.05452004 
Run 4 stress 0.05252225 
Run 5 stress 0.04269329 
... procrustes: rmse 0.001959693  max resid 0.004618999 
*** Solution reached
Square root transformation
Wisconsin double standardization
Run 0 stress 0.06218492 
Run 1 stress 0.06218277 
... New best solution
... procrustes: rmse 0.0004926153  max resid 0.00119587 
*** Solution reached
                    NMDS1      NMDS2
1__1.708-1.712 -1.5027386  0.3521671
1__1.712-1.717 -0.9376906 -0.0577772
1__1.717-1.719 -0.5779319 -0.2224296
1__1.719-1.725 -0.1941150 -0.2231443
1__1.725-1.729  0.1764679 -0.1675389
1__1.729-1.732  0.4510234 -0.1414083
                    NMDS1       NMDS2
1__1.710-1.716 -1.3836028 -0.43469829
1__1.716-1.718 -0.8252774 -0.32553372
1__1.718-1.724 -0.4793919 -0.09591122
1__1.724-1.730 -0.1106730  0.08921241
1__1.730-1.736  0.2081185  0.33438923
1__1.736-1.739  0.3291186  0.67220268

In [124]:
%%R

get.fracs = function(ord){
    fracs = gsub('.+__', '', rownames(ord %>% scores)) %>% as.data.frame()
    colnames(fracs) = c('fractions')
    fracs = fracs %>% 
        separate(fractions, c('start','end'), sep='-', convert=T) %>%
        mutate(start = start * 1000,
               end = end * 1000)
    return(fracs)
    }

ord1.f = get.fracs(ord1)
ord2.f = get.fracs(ord2)

In [125]:
%%R
library(IRanges)

In [130]:
%%R

ord1.r = IRanges(start=ord1.f$start, end=ord1.f$end)
ord2.r = IRanges(start=ord2.f$start, end=ord2.f$end)

In [139]:
%%R

ov = findOverlaps(ord1.r, ord2.r, select='first')
ov


 [1] 1 1 2 3 4 4 5 6 7 8 8 1 1 3 3 3 4 5 5 7 8

In [138]:
%%R

ov = findOverlaps(ord1.r, ord2.r)
ov


Hits of length 82
queryLength: 21
subjectLength: 19
    queryHits subjectHits 
     <integer>   <integer> 
 1           1           1 
 2           1          10 
 3           2           1 
 4           2           2 
 5           2          10 
 ...       ...         ... 
 78         21           8 
 79         21           9 
 80         21          17 
 81         21          18 
 82         21          19 

Calculating centroid of binned fraction samples

  • centroid of all 20 replicates for fraction samples that fall into the BD-range bin
  • trying oriellipse() function from vegan package

In [3]:
%%R
otu.tbl.file1 = '/home/nick/notebook/SIPSim/dev/bac_genome1210/atomIncorp_taxaIncorp/0/10/1/OTU_n2_abs1e9_sub-norm_filt.physeq'
otu.tbl.file2 = '/home/nick/notebook/SIPSim/dev/bac_genome1210/atomIncorp_taxaIncorp/100/10/1/OTU_n2_abs1e9_sub-norm_filt.physeq'

physeq1 = readRDS(otu.tbl.file1)
physeq2 = readRDS(otu.tbl.file2)

In [7]:
%%R

ord1 = ordinate(physeq1, method='NMDS', distance='bray')
ord2 = ordinate(physeq2, method='NMDS', distance='bray')


Square root transformation
Wisconsin double standardization
Run 0 stress 0.0426932 
Run 1 stress 0.05599419 
Run 2 stress 0.05364815 
Run 3 stress 0.04831451 
Run 4 stress 0.05759069 
Run 5 stress 0.0545324 
Run 6 stress 0.04330512 
Run 7 stress 0.04755724 
Run 8 stress 0.05767519 
Run 9 stress 0.05729747 
Run 10 stress 0.04330764 
Run 11 stress 0.04755629 
Run 12 stress 0.05482031 
Run 13 stress 0.04331283 
Run 14 stress 0.05137341 
Run 15 stress 0.05248201 
Run 16 stress 0.05740556 
Run 17 stress 0.05786267 
Run 18 stress 0.04771395 
Run 19 stress 0.0592138 
Run 20 stress 0.04269209 
... New best solution
... procrustes: rmse 0.001749721  max resid 0.004130339 
*** Solution reached
Square root transformation
Wisconsin double standardization
Run 0 stress 0.06218492 
Run 1 stress 0.06788541 
Run 2 stress 0.06430563 
Run 3 stress 0.06430264 
Run 4 stress 0.06218692 
... procrustes: rmse 0.0003201676  max resid 0.0007784938 
*** Solution reached
[1] 1

In [52]:
%%R

grps = as.character(rep(seq(1,nrow(ord1$points) / 2), 2))
grps = append(grps, '2')

plot(ord1, type = "p", display='sites')
elps = ordiellipse(ord1, grps, kind="se", conf=0.95, lwd=2, col="blue")

elps = elps %>% summary %>% t %>% as.data.frame
elps


          NMDS1       NMDS2         Area
1  -0.635357218  0.36677213 0.000000e+00
10  0.542324409  0.24938689          NaN
2  -0.766632793  0.13528520 8.365309e-01
3  -0.644489138 -0.17586293 0.000000e+00
4  -0.246709455 -0.23688196 2.433095e-10
5   0.008901491 -0.21533817 2.163852e-09
6   0.242886943 -0.17908837 1.858819e-09
7   0.487009997 -0.11097516          NaN
8   0.659751477  0.01597090 0.000000e+00
9   0.735630683  0.08308887 0.000000e+00

In [53]:
%%R

ggplot(elps, aes(NMDS1, NMDS2)) +
    geom_point()



In [64]:
%%R

get.ellipse = function(ord){
    grps = as.character(rep(seq(1,nrow(ord$points) / 2), 2))
    grps = append(grps, '2')
    
    plot(ord, type = "p", display='sites')
    elps = ordiellipse(ord, grps, kind="se", conf=0.95, lwd=2, col="blue")

    elps = elps %>% summary %>% t %>% as.data.frame
    return(elps)
    }


get.ellipse(ord1)


          NMDS1       NMDS2         Area
1  -0.635357218  0.36677213 0.000000e+00
10  0.542324409  0.24938689          NaN
2  -0.766632793  0.13528520 8.365309e-01
3  -0.644489138 -0.17586293 0.000000e+00
4  -0.246709455 -0.23688196 2.433095e-10
5   0.008901491 -0.21533817 2.163852e-09
6   0.242886943 -0.17908837 1.858819e-09
7   0.487009997 -0.11097516          NaN
8   0.659751477  0.01597090 0.000000e+00
9   0.735630683  0.08308887 0.000000e+00

In [86]:
%%R

mid = function(x, y){ (x + y)/2 }

get.BD.range = function(tbl){
    tbl = as.data.frame(tbl)
    tbl$lib = gsub('__.+', '', rownames(tbl)) %>% as.character
    tbl$BD.start = gsub('.+__([0-9.]+)-.+', '\\1', rownames(tbl)) %>% as.numeric
    tbl$BD.end = gsub('.+-', '', rownames(tbl)) %>% as.numeric
    tbl$BD.mid = mapply(mid, tbl$BD.start, tbl$BD.end)
    return(tbl)
    }

ord.BD = get.BD.range(ord1 %>% scores)
ord.BD %>% head


                    NMDS1      NMDS2 lib BD.start BD.end BD.mid
1__1.708-1.712 -1.5101237  0.3428264   1    1.708  1.712 1.7100
1__1.712-1.717 -0.9379486 -0.0516815   1    1.712  1.717 1.7145
1__1.717-1.719 -0.5784234 -0.2150748   1    1.717  1.719 1.7180
1__1.719-1.725 -0.1946565 -0.2175746   1    1.719  1.725 1.7220
1__1.725-1.729  0.1766000 -0.1668565   1    1.725  1.729 1.7270
1__1.729-1.732  0.4518703 -0.1430241   1    1.729  1.732 1.7305

In [87]:
%%R
# making fixed BD-range & binning by BD.mid

BD.range = seq(1.6, 1.9, 0.004)
BD.range


 [1] 1.600 1.604 1.608 1.612 1.616 1.620 1.624 1.628 1.632 1.636 1.640 1.644
[13] 1.648 1.652 1.656 1.660 1.664 1.668 1.672 1.676 1.680 1.684 1.688 1.692
[25] 1.696 1.700 1.704 1.708 1.712 1.716 1.720 1.724 1.728 1.732 1.736 1.740
[37] 1.744 1.748 1.752 1.756 1.760 1.764 1.768 1.772 1.776 1.780 1.784 1.788
[49] 1.792 1.796 1.800 1.804 1.808 1.812 1.816 1.820 1.824 1.828 1.832 1.836
[61] 1.840 1.844 1.848 1.852 1.856 1.860 1.864 1.868 1.872 1.876 1.880 1.884
[73] 1.888 1.892 1.896 1.900

In [ ]: