Making NMDS plots of Bray-Curtis values for each simulation

  • simulations from atomIncorp_taxaIncorp
  • plotting centroids of each replicate
    • gradient fraction samples between replicates are groups by uniform BD-range bins

In [1]:
workDir = '/home/nick/notebook/SIPSim/dev/bac_genome1210/'
inFileDir = os.path.join(workDir, 'atomIncorp_taxaIncorp')
buildDir = os.path.join(workDir, 'atomIncorp_taxaIncorp_NMDS')
R_dir = '/home/nick/notebook/SIPSim/lib/R/'

Init


In [2]:
import glob
from os.path import abspath
import nestly
from IPython.display import Image, display

In [3]:
%load_ext rpy2.ipython

In [4]:
%%R
library(ggplot2)
library(dplyr)
library(tidyr)
library(gridExtra)
library(phyloseq)
library(vegan)


Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union

Loading required package: grid
Loading required package: permute
Loading required package: lattice
This is vegan 2.3-0

In [112]:
if not os.path.isdir(buildDir):
    os.makedirs(buildDir)

Loading phyloseq objects


In [113]:
%%R -i inFileDir

files = list.files(inFileDir, pattern='OTU_n2_abs1e9_sub-norm_filt.physeq', recursive=T)
dirs = gsub('/[0-9]+/[^/]+$','', files) %>% unique %>% as.data.frame
colnames(dirs) = c('dirs')


percTaxa.to.keep = c(1,10,25,50)

tbl = dirs %>%
    separate(dirs, c('percIncorp','percTaxa'), sep='/', remove=F) %>%
    filter(percTaxa %in% percTaxa.to.keep)

tbl %>% nrow %>% print
tbl %>% head


[1] 16
    dirs percIncorp percTaxa
1   0/10          0       10
2    0/1          0        1
3   0/25          0       25
4   0/50          0       50
5 100/10        100       10
6  100/1        100        1

functions

  • loading files
  • joining OTU tables

In [165]:
%%R -i inFileDir

get.files = function(dir, base.path, pattern){
    dir = as.character(dir)
    p = paste(c(base.path, dir), collapse='/')
    files = list.files(p, pattern=pattern, recursive=T)
    p = gsub('$', '/', p)
    files = gsub('^', p, files)
    return(files)
    }


get.otu.tbls = function(files){
    tbl.l = list()    
    for(f in files){
        physeq = readRDS(f)
    
        id = gsub('/[^/]+$', '', f)
        id = gsub('.+atomIncorp_taxaIncorp/', '', id)
        id = paste(c(id,'__'), collapse='')
        otu.tbl = physeq %>% otu_table %>% as.data.frame
        colnames(otu.tbl) = gsub('^', id, colnames(otu.tbl))
        otu.tbl$taxon = rownames(otu.tbl)
        tbl.l[[f]] = otu.tbl
        }
    return(tbl.l)
    }


join.otu.tbls = function(tbl.l){
    n = names(tbl.l)

    tbl = tbl.l[[n[1]]]
    for (i in seq(2:length(n))){
        n.i = n[i]
        tbl = left_join(tbl, tbl.l[[n.i]], c('taxon'='taxon'))
        }
    
    # edit table
    tbl[is.na(tbl)] = 0
    rownames(tbl) = tbl$taxon
    tbl$taxon = NULL
    
    # return
    return(tbl)
    }


make.meta = function(tbl, BD.range = seq(1.6, 1.9, 0.004)){
    meta = tbl %>% colnames %>% as.data.frame 
    colnames(meta) = c('X')
    meta = filter(meta, X != 'taxon')

    # midpoint function
    mid = function(x, y){ (x + y)/2 }

    # making fixed BD-range
    #BD.range = seq(1.6, 1.9, 0.004)

    #print(meta); break
    
    # edit metadata
    meta = meta %>%
        #separate(X, c('percIncorp','percTaxa','rep','library','BD_range'), sep='__', remove=F) %>%
        separate(X, c('tmp1', 'library', 'tmp2'), sep='__', remove=F) %>%
        separate(tmp1, c('percIncorp','percTaxa', 'rep'), sep='/') %>%
        mutate(tmp2 = gsub('.[xy]$', '', tmp2)) %>%
        separate(tmp2, c('BD.min', 'BD.max'), sep='-') %>%
        mutate(BD.min = as.numeric(BD.min),
               BD.max = as.numeric(BD.max),
               BD.mid = mapply(mid, BD.min, BD.max),   
               BD.bin = cut(BD.mid, breaks=BD.range)) %>%
        unite(groups, library, BD.bin, sep='__')
    return(meta)
    }
    

edit.ellipse = function(elps){
    # midpoint function
    mid = function(x, y){ (x + y)/2 }

    elps.tbl = elps %>% summary %>% t %>% as.data.frame
    elps.tbl$meta = rownames(elps.tbl)
    elps.tbl = elps.tbl %>%
        separate(meta, c('library','BD.range'), sep='__') %>%
        mutate(BD.range = gsub('^.', '', BD.range),
               BD.range = gsub('.$', '', BD.range)) %>%
        separate(BD.range, c('BD.min','BD.max'), sep=',', remove=F) %>%
        mutate(
            BD.min = as.numeric(BD.min),
            BD.max = as.numeric(BD.max),
            BD.mid = mapply(mid, BD.min, BD.max))
    }

In [166]:
%%R

# wrapper function
get.ellipses = function(x){
    dir = x['dirs']
    
    # test run of functions
    files = get.files(dir, base.path=inFileDir, pattern='OTU_n2_abs1e9_sub-norm_filt.physeq')
    otu.tbls = get.otu.tbls(files)
    otu.tbl = join.otu.tbls(otu.tbls)
    # making metadata
    meta = make.meta(otu.tbl, BD.range=seq(1.6, 2, 0.002))
    ord = metaMDS(otu.tbl %>% t)
    # getting ellipse
    plot(ord, type = "p", display='sites')
    elps = ordiellipse(ord, groups=meta$groups, kind="se", conf=0.95, lwd=2, col="blue")
    # edit ellipse file
    elps.tbl = edit.ellipse(elps)
    elps.tbl$percIncorp = x['percIncorp'] %>% as.vector
    elps.tbl$percTaxa = x['percTaxa'] %>% as.vector
    
    return(elps.tbl)
    }

Applying functions to all replicates of each set of params


In [167]:
%%R
elps.tbls = apply(tbl, 1, get.ellipses)


Square root transformation
Wisconsin double standardization
Run 0 stress 0.1790159 
Run 1 stress 0.1826813 
Run 2 stress 0.1905444 
Run 3 stress 0.1857775 
Run 4 stress 0.1870961 
Run 5 stress 0.1816663 
Run 6 stress 0.1859556 
Run 7 stress 0.1891756 
Run 8 stress 0.1913628 
Run 9 stress 0.1852682 
Run 10 stress 0.1815447 
Run 11 stress 0.1896646 
Run 12 stress 0.1829106 
Run 13 stress 0.1845297 
Run 14 stress 0.1881721 
Run 15 stress 0.1872477 
Run 16 stress 0.1833416 
Run 17 stress 0.1833737 
Run 18 stress 0.1809264 
Run 19 stress 0.1880924 
Run 20 stress 0.1904182 
Square root transformation
Wisconsin double standardization
Run 0 stress 0.1926144 
Run 1 stress 0.4189866 
Run 2 stress 0.199258 
Run 3 stress 0.1968785 
Run 4 stress 0.1984816 
Run 5 stress 0.2010789 
Run 6 stress 0.1961535 
Run 7 stress 0.1987694 
Run 8 stress 0.1971745 
Run 9 stress 0.1940195 
Run 10 stress 0.2007577 
Run 11 stress 0.1949308 
Run 12 stress 0.19829 
Run 13 stress 0.1951333 
Run 14 stress 0.1945277 
Run 15 stress 0.195445 
Run 16 stress 0.197589 
Run 17 stress 0.1958001 
Run 18 stress 0.1977573 
Run 19 stress 0.1957068 
Run 20 stress 0.2023255 
Square root transformation
Wisconsin double standardization
Run 0 stress 0.2000706 
Run 1 stress 0.2026263 
Run 2 stress 0.2023299 
Run 3 stress 0.2012189 
Run 4 stress 0.2002259 
... procrustes: rmse 0.0251287  max resid 0.1147409 
Run 5 stress 0.2013633 
Run 6 stress 0.2016688 
Run 7 stress 0.1999629 
... New best solution
... procrustes: rmse 0.02605301  max resid 0.1418685 
Run 8 stress 0.201575 
Run 9 stress 0.2018953 
Run 10 stress 0.2022682 
Run 11 stress 0.2009635 
Run 12 stress 0.2000142 
... procrustes: rmse 0.02785366  max resid 0.1082478 
Run 13 stress 0.2044475 
Run 14 stress 0.2036841 
Run 15 stress 0.4189013 
Run 16 stress 0.1983235 
... New best solution
... procrustes: rmse 0.02437434  max resid 0.08328625 
Run 17 stress 0.2011757 
Run 18 stress 0.2025951 
Run 19 stress 0.2020738 
Run 20 stress 0.1998653 
Square root transformation
Wisconsin double standardization
Run 0 stress 0.2011797 
Run 1 stress 0.2027306 
Run 2 stress 0.2041396 
Run 3 stress 0.2081081 
Run 4 stress 0.2015614 
... procrustes: rmse 0.02029456  max resid 0.1258044 
Run 5 stress 0.2018227 
Run 6 stress 0.2031517 
Run 7 stress 0.2014601 
... procrustes: rmse 0.01986691  max resid 0.1193619 
Run 8 stress 0.2034117 
Run 9 stress 0.2031509 
Run 10 stress 0.2025818 
Run 11 stress 0.2050643 
Run 12 stress 0.2020343 
Run 13 stress 0.2018375 
Run 14 stress 0.2048858 
Run 15 stress 0.418996 
Run 16 stress 0.2017167 
Run 17 stress 0.2051474 
Run 18 stress 0.2024395 
Run 19 stress 0.2026861 
Run 20 stress 0.2013772 
... procrustes: rmse 0.01503062  max resid 0.1222977 
Square root transformation
Wisconsin double standardization
Run 0 stress 0.2778789 
Run 1 stress 0.2683635 
... New best solution
... procrustes: rmse 0.03920218  max resid 0.1557902 
Run 2 stress 0.2762629 
Run 3 stress 0.2660155 
... New best solution
... procrustes: rmse 0.03468573  max resid 0.1732854 
Run 4 stress 0.2746118 
Run 5 stress 0.2665009 
... procrustes: rmse 0.03504724  max resid 0.1868679 
Run 6 stress 0.2639792 
... New best solution
... procrustes: rmse 0.03573549  max resid 0.1869908 
Run 7 stress 0.2676936 
Run 8 stress 0.2683389 
Run 9 stress 0.2643395 
... procrustes: rmse 0.0312064  max resid 0.2026738 
Run 10 stress 0.2701555 
Run 11 stress 0.2644202 
... procrustes: rmse 0.03497134  max resid 0.1904078 
Run 12 stress 0.2655966 
Run 13 stress 0.2610281 
... New best solution
... procrustes: rmse 0.03342108  max resid 0.1920624 
Run 14 stress 0.2694683 
Run 15 stress 0.2720143 
Run 16 stress 0.2628386 
Run 17 stress 0.2681965 
Run 18 stress 0.2710333 
Run 19 stress 0.2742105 
Run 20 stress 0.2738975 
Square root transformation
Wisconsin double standardization
Run 0 stress 0.2245121 
Run 1 stress 0.2275129 
Run 2 stress 0.2260413 
Run 3 stress 0.2252968 
Run 4 stress 0.2236913 
... New best solution
... procrustes: rmse 0.03095977  max resid 0.1865105 
Run 5 stress 0.2284704 
Run 6 stress 0.2250129 
Run 7 stress 0.2254845 
Run 8 stress 0.2279135 
Run 9 stress 0.2275989 
Run 10 stress 0.2235594 
... New best solution
... procrustes: rmse 0.03323906  max resid 0.1646685 
Run 11 stress 0.2252045 
Run 12 stress 0.2256815 
Run 13 stress 0.2279042 
Run 14 stress 0.2254078 
Run 15 stress 0.2270166 
Run 16 stress 0.2270669 
Run 17 stress 0.2276757 
Run 18 stress 0.2287243 
Run 19 stress 0.2297979 
Run 20 stress 0.2315754 
Square root transformation
Wisconsin double standardization
Run 0 stress 0.307817 
Run 1 stress 0.3077268 
... New best solution
... procrustes: rmse 0.03147815  max resid 0.1535973 
Run 2 stress 0.3099218 
Run 3 stress 0.3024218 
... New best solution
... procrustes: rmse 0.04063426  max resid 0.1398664 
Run 4 stress 0.308722 
Run 5 stress 0.3045955 
Run 6 stress 0.3141494 
Run 7 stress 0.302488 
... procrustes: rmse 0.02947025  max resid 0.1491228 
Run 8 stress 0.3070045 
Run 9 stress 0.2991939 
... New best solution
... procrustes: rmse 0.03531699  max resid 0.1542411 
Run 10 stress 0.3089018 
Run 11 stress 0.305673 
Run 12 stress 0.3032504 
Run 13 stress 0.3054579 
Run 14 stress 0.3080623 
Run 15 stress 0.3104215 
Run 16 stress 0.3117268 
Run 17 stress 0.3048184 
Run 18 stress 0.2971505 
... New best solution
... procrustes: rmse 0.02857545  max resid 0.1645126 
Run 19 stress 0.3142845 
Run 20 stress 0.2979478 
Square root transformation
Wisconsin double standardization
Run 0 stress 0.2977684 
Run 1 stress 0.3152073 
Run 2 stress 0.3059951 
Run 3 stress 0.3055432 
Run 4 stress 0.3021358 
Run 5 stress 0.317438 
Run 6 stress 0.3133509 
Run 7 stress 0.314594 
Run 8 stress 0.3144994 
Run 9 stress 0.3091444 
Run 10 stress 0.3062549 
Run 11 stress 0.3108559 
Run 12 stress 0.3182821 
Run 13 stress 0.3127271 
Run 14 stress 0.3117231 
Run 15 stress 0.3134472 
Run 16 stress 0.3176463 
Run 17 stress 0.4189508 
Run 18 stress 0.3169414 
Run 19 stress 0.3097598 
Run 20 stress 0.4189585 
Square root transformation
Wisconsin double standardization
Run 0 stress 0.2175042 
Run 1 stress 0.2171887 
... New best solution
... procrustes: rmse 0.02458901  max resid 0.1398951 
Run 2 stress 0.2201059 
Run 3 stress 0.2250702 
Run 4 stress 0.2199631 
Run 5 stress 0.2179218 
Run 6 stress 0.2229787 
Run 7 stress 0.2165832 
... New best solution
... procrustes: rmse 0.01676595  max resid 0.1245146 
Run 8 stress 0.2218961 
Run 9 stress 0.217447 
Run 10 stress 0.2211216 
Run 11 stress 0.2183106 
Run 12 stress 0.2182699 
Run 13 stress 0.2194017 
Run 14 stress 0.2171168 
Run 15 stress 0.2214954 
Run 16 stress 0.2207491 
Run 17 stress 0.2174462 
Run 18 stress 0.2185349 
Run 19 stress 0.2218827 
Run 20 stress 0.2179202 
Square root transformation
Wisconsin double standardization
Run 0 stress 0.2030314 
Run 1 stress 0.2027835 
... New best solution
... procrustes: rmse 0.01872686  max resid 0.1013906 
Run 2 stress 0.201681 
... New best solution
... procrustes: rmse 0.02833396  max resid 0.1375546 
Run 3 stress 0.2020347 
... procrustes: rmse 0.01107319  max resid 0.1180968 
Run 4 stress 0.2041156 
Run 5 stress 0.204162 
Run 6 stress 0.2029962 
Run 7 stress 0.2024077 
Run 8 stress 0.2026982 
Run 9 stress 0.2046391 
Run 10 stress 0.2022102 
Run 11 stress 0.2015739 
... New best solution
... procrustes: rmse 0.02336402  max resid 0.1434586 
Run 12 stress 0.2014856 
... New best solution
... procrustes: rmse 0.009647457  max resid 0.09737341 
Run 13 stress 0.2050764 
Run 14 stress 0.2026347 
Run 15 stress 0.4189965 
Run 16 stress 0.2008764 
... New best solution
... procrustes: rmse 0.01165942  max resid 0.1113377 
Run 17 stress 0.2037579 
Run 18 stress 0.2024411 
Run 19 stress 0.2040616 
Run 20 stress 0.2022927 
Square root transformation
Wisconsin double standardization
Run 0 stress 0.2181834 
Run 1 stress 0.2245929 
Run 2 stress 0.2212309 
Run 3 stress 0.2207496 
Run 4 stress 0.2198986 
Run 5 stress 0.2213678 
Run 6 stress 0.2173055 
... New best solution
... procrustes: rmse 0.009518584  max resid 0.08241352 
Run 7 stress 0.2193386 
Run 8 stress 0.2256982 
Run 9 stress 0.418944 
Run 10 stress 0.2181331 
Run 11 stress 0.2199544 
Run 12 stress 0.4189499 
Run 13 stress 0.2241071 
Run 14 stress 0.2206238 
Run 15 stress 0.2261741 
Run 16 stress 0.2202512 
Run 17 stress 0.2255324 
Run 18 stress 0.2170376 
... New best solution
... procrustes: rmse 0.00725791  max resid 0.08386704 
Run 19 stress 0.2309696 
Run 20 stress 0.2213828 
Square root transformation
Wisconsin double standardization
Run 0 stress 0.213505 
Run 1 stress 0.2146395 
Run 2 stress 0.2158451 
Run 3 stress 0.4188531 
Run 4 stress 0.2162048 
Run 5 stress 0.2143637 
Run 6 stress 0.2129681 
... New best solution
... procrustes: rmse 0.01970531  max resid 0.1414625 
Run 7 stress 0.2169088 
Run 8 stress 0.217129 
Run 9 stress 0.214608 
Run 10 stress 0.2155086 
Run 11 stress 0.2130278 
... procrustes: rmse 0.01639995  max resid 0.143761 
Run 12 stress 0.2154133 
Run 13 stress 0.2163192 
Run 14 stress 0.2136369 
Run 15 stress 0.2140566 
Run 16 stress 0.2201632 
Run 17 stress 0.2168475 
Run 18 stress 0.2132374 
... procrustes: rmse 0.01898704  max resid 0.1427878 
Run 19 stress 0.2160973 
Run 20 stress 0.2151396 
Square root transformation
Wisconsin double standardization
Run 0 stress 0.2445091 
Run 1 stress 0.2390363 
... New best solution
... procrustes: rmse 0.03618717  max resid 0.1622581 
Run 2 stress 0.2390225 
... New best solution
... procrustes: rmse 0.02675696  max resid 0.1811692 
Run 3 stress 0.244366 
Run 4 stress 0.2469441 
Run 5 stress 0.2464644 
Run 6 stress 0.2423661 
Run 7 stress 0.2444409 
Run 8 stress 0.2438853 
Run 9 stress 0.2396306 
Run 10 stress 0.2444932 
Run 11 stress 0.2460162 
Run 12 stress 0.2418131 
Run 13 stress 0.2435337 
Run 14 stress 0.2457756 
Run 15 stress 0.2461296 
Run 16 stress 0.2424575 
Run 17 stress 0.2435973 
Run 18 stress 0.2439488 
Run 19 stress 0.2367106 
... New best solution
... procrustes: rmse 0.02716894  max resid 0.1589136 
Run 20 stress 0.2435418 
Square root transformation
Wisconsin double standardization
Run 0 stress 0.2126107 
Run 1 stress 0.2166608 
Run 2 stress 0.2161175 
Run 3 stress 0.2177959 
Run 4 stress 0.2131232 
Run 5 stress 0.2118112 
... New best solution
... procrustes: rmse 0.01932961  max resid 0.1604795 
Run 6 stress 0.2143053 
Run 7 stress 0.2172715 
Run 8 stress 0.213649 
Run 9 stress 0.2155582 
Run 10 stress 0.2259611 
Run 11 stress 0.2176317 
Run 12 stress 0.2156092 
Run 13 stress 0.2216555 
Run 14 stress 0.2151244 
Run 15 stress 0.2134828 
Run 16 stress 0.2183714 
Run 17 stress 0.2150234 
Run 18 stress 0.2144987 
Run 19 stress 0.2168899 
Run 20 stress 0.2116765 
... New best solution
... procrustes: rmse 0.00721256  max resid 0.09556044 
Square root transformation
Wisconsin double standardization
Run 0 stress 0.2625669 
Run 1 stress 0.2638941 
Run 2 stress 0.2644776 
Run 3 stress 0.2699065 
Run 4 stress 0.266605 
Run 5 stress 0.2678898 
Run 6 stress 0.2665244 
Run 7 stress 0.2616436 
... New best solution
... procrustes: rmse 0.02230922  max resid 0.1555541 
Run 8 stress 0.2608355 
... New best solution
... procrustes: rmse 0.02048264  max resid 0.1047469 
Run 9 stress 0.2712026 
Run 10 stress 0.2620757 
Run 11 stress 0.2667268 
Run 12 stress 0.2623281 
Run 13 stress 0.2688867 
Run 14 stress 0.2685515 
Run 15 stress 0.265638 
Run 16 stress 0.2709352 
Run 17 stress 0.2675698 
Run 18 stress 0.2687303 
Run 19 stress 0.2654582 
Run 20 stress 0.273199 
Square root transformation
Wisconsin double standardization
Run 0 stress 0.255568 
Run 1 stress 0.2606083 
Run 2 stress 0.2636984 
Run 3 stress 0.2562182 
Run 4 stress 0.2650849 
Run 5 stress 0.2614314 
Run 6 stress 0.2554093 
... New best solution
... procrustes: rmse 0.02145027  max resid 0.1044858 
Run 7 stress 0.2555421 
... procrustes: rmse 0.02356887  max resid 0.1186675 
Run 8 stress 0.2656055 
Run 9 stress 0.2586464 
Run 10 stress 0.2587184 
Run 11 stress 0.2597864 
Run 12 stress 0.2665255 
Run 13 stress 0.2650768 
Run 14 stress 0.2549842 
... New best solution
... procrustes: rmse 0.02163776  max resid 0.1025825 
Run 15 stress 0.2622467 
Run 16 stress 0.2586789 
Run 17 stress 0.2571542 
Run 18 stress 0.258006 
Run 19 stress 0.2555882 
Run 20 stress 0.2629609