Making NMDS plots of Bray-Curtis values for each simulation

  • simulations from atomIncorp_taxaIncorp
  • plotting centroids of each replicate
    • gradient fraction samples between replicates are groups by uniform BD-range bins

In [1]:
workDir = '/home/nick/notebook/SIPSim/dev/bac_genome1210/'
inFileDir = os.path.join(workDir, 'atomIncorp_taxaIncorp')
buildDir = os.path.join(workDir, 'atomIncorp_taxaIncorp_NMDS')
R_dir = '/home/nick/notebook/SIPSim/lib/R/'

Init


In [2]:
import glob
from os.path import abspath
import nestly
from IPython.display import Image, display

In [3]:
%load_ext rpy2.ipython

In [4]:
%%R
library(ggplot2)
library(dplyr)
library(tidyr)
library(gridExtra)
library(phyloseq)
library(vegan)


Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union

Loading required package: grid
Loading required package: permute
Loading required package: lattice
This is vegan 2.3-0

In [112]:
if not os.path.isdir(buildDir):
    os.makedirs(buildDir)

Loading phyloseq objects


In [113]:
%%R -i inFileDir

files = list.files(inFileDir, pattern='OTU_n2_abs1e9_sub-norm_filt.physeq', recursive=T)
dirs = gsub('/[0-9]+/[^/]+$','', files) %>% unique %>% as.data.frame
colnames(dirs) = c('dirs')


percTaxa.to.keep = c(1,10,25,50)

tbl = dirs %>%
    separate(dirs, c('percIncorp','percTaxa'), sep='/', remove=F) %>%
    filter(percTaxa %in% percTaxa.to.keep)

tbl %>% nrow %>% print
tbl %>% head


[1] 16
    dirs percIncorp percTaxa
1   0/10          0       10
2    0/1          0        1
3   0/25          0       25
4   0/50          0       50
5 100/10        100       10
6  100/1        100        1

functions

  • loading files
  • joining OTU tables

In [165]:
%%R -i inFileDir

get.files = function(dir, base.path, pattern){
    dir = as.character(dir)
    p = paste(c(base.path, dir), collapse='/')
    files = list.files(p, pattern=pattern, recursive=T)
    p = gsub('$', '/', p)
    files = gsub('^', p, files)
    return(files)
    }


get.otu.tbls = function(files){
    tbl.l = list()    
    for(f in files){
        physeq = readRDS(f)
    
        id = gsub('/[^/]+$', '', f)
        id = gsub('.+atomIncorp_taxaIncorp/', '', id)
        id = paste(c(id,'__'), collapse='')
        otu.tbl = physeq %>% otu_table %>% as.data.frame
        colnames(otu.tbl) = gsub('^', id, colnames(otu.tbl))
        otu.tbl$taxon = rownames(otu.tbl)
        tbl.l[[f]] = otu.tbl
        }
    return(tbl.l)
    }


join.otu.tbls = function(tbl.l){
    n = names(tbl.l)

    tbl = tbl.l[[n[1]]]
    for (i in seq(2:length(n))){
        n.i = n[i]
        tbl = left_join(tbl, tbl.l[[n.i]], c('taxon'='taxon'))
        }
    
    # edit table
    tbl[is.na(tbl)] = 0
    rownames(tbl) = tbl$taxon
    tbl$taxon = NULL
    
    # return
    return(tbl)
    }


make.meta = function(tbl, BD.range = seq(1.6, 1.9, 0.004)){
    meta = tbl %>% colnames %>% as.data.frame 
    colnames(meta) = c('X')
    meta = filter(meta, X != 'taxon')

    # midpoint function
    mid = function(x, y){ (x + y)/2 }

    # making fixed BD-range
    #BD.range = seq(1.6, 1.9, 0.004)

    #print(meta); break
    
    # edit metadata
    meta = meta %>%
        #separate(X, c('percIncorp','percTaxa','rep','library','BD_range'), sep='__', remove=F) %>%
        separate(X, c('tmp1', 'library', 'tmp2'), sep='__', remove=F) %>%
        separate(tmp1, c('percIncorp','percTaxa', 'rep'), sep='/') %>%
        mutate(tmp2 = gsub('.[xy]$', '', tmp2)) %>%
        separate(tmp2, c('BD.min', 'BD.max'), sep='-') %>%
        mutate(BD.min = as.numeric(BD.min),
               BD.max = as.numeric(BD.max),
               BD.mid = mapply(mid, BD.min, BD.max),   
               BD.bin = cut(BD.mid, breaks=BD.range)) %>%
        unite(groups, library, BD.bin, sep='__')
    return(meta)
    }
    

edit.ellipse = function(elps){
    # midpoint function
    mid = function(x, y){ (x + y)/2 }

    elps.tbl = elps %>% summary %>% t %>% as.data.frame
    elps.tbl$meta = rownames(elps.tbl)
    elps.tbl = elps.tbl %>%
        separate(meta, c('library','BD.range'), sep='__') %>%
        mutate(BD.range = gsub('^.', '', BD.range),
               BD.range = gsub('.$', '', BD.range)) %>%
        separate(BD.range, c('BD.min','BD.max'), sep=',', remove=F) %>%
        mutate(
            BD.min = as.numeric(BD.min),
            BD.max = as.numeric(BD.max),
            BD.mid = mapply(mid, BD.min, BD.max))
    }

In [166]:
%%R

# wrapper function
get.ellipses = function(x){
    dir = x['dirs']
    
    # test run of functions
    files = get.files(dir, base.path=inFileDir, pattern='OTU_n2_abs1e9_sub-norm_filt.physeq')
    otu.tbls = get.otu.tbls(files)
    otu.tbl = join.otu.tbls(otu.tbls)
    # making metadata
    meta = make.meta(otu.tbl, BD.range=seq(1.6, 2, 0.002))
    ord = metaMDS(otu.tbl %>% t)
    # getting ellipse
    plot(ord, type = "p", display='sites')
    elps = ordiellipse(ord, groups=meta$groups, kind="se", conf=0.95, lwd=2, col="blue")
    # edit ellipse file
    elps.tbl = edit.ellipse(elps)
    elps.tbl$percIncorp = x['percIncorp'] %>% as.vector
    elps.tbl$percTaxa = x['percTaxa'] %>% as.vector
    
    return(elps.tbl)
    }

Applying functions to all replicates of each set of params


In [167]:
%%R
elps.tbls = apply(tbl, 1, get.ellipses)


Square root transformation
Wisconsin double standardization
Run 0 stress 0.1790159 
Run 1 stress 0.1826813 
Run 2 stress 0.1905444 
Run 3 stress 0.1857775 
Run 4 stress 0.1870961 
Run 5 stress 0.1816663 
Run 6 stress 0.1859556 
Run 7 stress 0.1891756 
Run 8 stress 0.1913628 
Run 9 stress 0.1852682 
Run 10 stress 0.1815447 
Run 11 stress 0.1896646 
Run 12 stress 0.1829106 
Run 13 stress 0.1845297 
Run 14 stress 0.1881721 
Run 15 stress 0.1872477 
Run 16 stress 0.1833416 
Run 17 stress 0.1833737 
Run 18 stress 0.1809264 
Run 19 stress 0.1880924 
Run 20 stress 0.1904182 
Square root transformation
Wisconsin double standardization
Run 0 stress 0.1926144 
Run 1 stress 0.4189866 
Run 2 stress 0.199258 
Run 3 stress 0.1968785 
Run 4 stress 0.1984816 
Run 5 stress 0.2010789 
Run 6 stress 0.1961535 
Run 7 stress 0.1987694 
Run 8 stress 0.1971745 
Run 9 stress 0.1940195 
Run 10 stress 0.2007577 
Run 11 stress 0.1949308 
Run 12 stress 0.19829 
Run 13 stress 0.1951333 
Run 14 stress 0.1945277 
Run 15 stress 0.195445 
Run 16 stress 0.197589 
Run 17 stress 0.1958001 
Run 18 stress 0.1977573 
Run 19 stress 0.1957068 
Run 20 stress 0.2023255 
Square root transformation
Wisconsin double standardization
Run 0 stress 0.2000706 
Run 1 stress 0.2026263 
Run 2 stress 0.2023299 
Run 3 stress 0.2012189 
Run 4 stress 0.2002259 
... procrustes: rmse 0.0251287  max resid 0.1147409 
Run 5 stress 0.2013633 
Run 6 stress 0.2016688 
Run 7 stress 0.1999629 
... New best solution
... procrustes: rmse 0.02605301  max resid 0.1418685 
Run 8 stress 0.201575 
Run 9 stress 0.2018953 
Run 10 stress 0.2022682 
Run 11 stress 0.2009635 
Run 12 stress 0.2000142 
... procrustes: rmse 0.02785366  max resid 0.1082478 
Run 13 stress 0.2044475 
Run 14 stress 0.2036841 
Run 15 stress 0.4189013 
Run 16 stress 0.1983235 
... New best solution
... procrustes: rmse 0.02437434  max resid 0.08328625 
Run 17 stress 0.2011757 
Run 18 stress 0.2025951 
Run 19 stress 0.2020738 
Run 20 stress 0.1998653 
Square root transformation
Wisconsin double standardization
Run 0 stress 0.2011797 
Run 1 stress 0.2027306 
Run 2 stress 0.2041396 
Run 3 stress 0.2081081 
Run 4 stress 0.2015614 
... procrustes: rmse 0.02029456  max resid 0.1258044 
Run 5 stress 0.2018227 
Run 6 stress 0.2031517 
Run 7 stress 0.2014601 
... procrustes: rmse 0.01986691  max resid 0.1193619 
Run 8 stress 0.2034117 
Run 9 stress 0.2031509 
Run 10 stress 0.2025818 
Run 11 stress 0.2050643 
Run 12 stress 0.2020343 
Run 13 stress 0.2018375 
Run 14 stress 0.2048858 
Run 15 stress 0.418996 
Run 16 stress 0.2017167 
Run 17 stress 0.2051474 
Run 18 stress 0.2024395 
Run 19 stress 0.2026861 
Run 20 stress 0.2013772 
... procrustes: rmse 0.01503062  max resid 0.1222977 
Square root transformation
Wisconsin double standardization
Run 0 stress 0.2778789 
Run 1 stress 0.2683635 
... New best solution
... procrustes: rmse 0.03920218  max resid 0.1557902 
Run 2 stress 0.2762629 
Run 3 stress 0.2660155 
... New best solution
... procrustes: rmse 0.03468573  max resid 0.1732854 
Run 4 stress 0.2746118 
Run 5 stress 0.2665009 
... procrustes: rmse 0.03504724  max resid 0.1868679 
Run 6 stress 0.2639792 
... New best solution
... procrustes: rmse 0.03573549  max resid 0.1869908 
Run 7 stress 0.2676936 
Run 8 stress 0.2683389 
Run 9 stress 0.2643395 
... procrustes: rmse 0.0312064  max resid 0.2026738 
Run 10 stress 0.2701555 
Run 11 stress 0.2644202 
... procrustes: rmse 0.03497134  max resid 0.1904078 
Run 12 stress 0.2655966 
Run 13 stress 0.2610281 
... New best solution
... procrustes: rmse 0.03342108  max resid 0.1920624 
Run 14 stress 0.2694683 
Run 15 stress 0.2720143 
Run 16 stress 0.2628386 
Run 17 stress 0.2681965 
Run 18 stress 0.2710333 
Run 19 stress 0.2742105 
Run 20 stress 0.2738975 
Square root transformation
Wisconsin double standardization
Run 0 stress 0.2245121 
Run 1 stress 0.2275129 
Run 2 stress 0.2260413 
Run 3 stress 0.2252968 
Run 4 stress 0.2236913 
... New best solution
... procrustes: rmse 0.03095977  max resid 0.1865105 
Run 5 stress 0.2284704 
Run 6 stress 0.2250129 
Run 7 stress 0.2254845 
Run 8 stress 0.2279135 
Run 9 stress 0.2275989 
Run 10 stress 0.2235594 
... New best solution
... procrustes: rmse 0.03323906  max resid 0.1646685 
Run 11 stress 0.2252045 
Run 12 stress 0.2256815 
Run 13 stress 0.2279042 
Run 14 stress 0.2254078 
Run 15 stress 0.2270166 
Run 16 stress 0.2270669 
Run 17 stress 0.2276757 
Run 18 stress 0.2287243 
Run 19 stress 0.2297979 
Run 20 stress 0.2315754 
Square root transformation
Wisconsin double standardization
Run 0 stress 0.307817 
Run 1 stress 0.3077268 
... New best solution
... procrustes: rmse 0.03147815  max resid 0.1535973 
Run 2 stress 0.3099218 
Run 3 stress 0.3024218 
... New best solution
... procrustes: rmse 0.04063426  max resid 0.1398664 
Run 4 stress 0.308722 
Run 5 stress 0.3045955 
Run 6 stress 0.3141494 
Run 7 stress 0.302488 
... procrustes: rmse 0.02947025  max resid 0.1491228 
Run 8 stress 0.3070045 
Run 9 stress 0.2991939 
... New best solution
... procrustes: rmse 0.03531699  max resid 0.1542411 
Run 10 stress 0.3089018 
Run 11 stress 0.305673 
Run 12 stress 0.3032504 
Run 13 stress 0.3054579 
Run 14 stress 0.3080623 
Run 15 stress 0.3104215 
Run 16 stress 0.3117268 
Run 17 stress 0.3048184 
Run 18 stress 0.2971505 
... New best solution
... procrustes: rmse 0.02857545  max resid 0.1645126 
Run 19 stress 0.3142845 
Run 20 stress 0.2979478 
Square root transformation
Wisconsin double standardization
Run 0 stress 0.2977684 
Run 1 stress 0.3152073 
Run 2 stress 0.3059951 
Run 3 stress 0.3055432 
Run 4 stress 0.3021358 
Run 5 stress 0.317438 
Run 6 stress 0.3133509 
Run 7 stress 0.314594 
Run 8 stress 0.3144994 
Run 9 stress 0.3091444 
Run 10 stress 0.3062549 
Run 11 stress 0.3108559 
Run 12 stress 0.3182821 
Run 13 stress 0.3127271 
Run 14 stress 0.3117231 
Run 15 stress 0.3134472 
Run 16 stress 0.3176463 
Run 17 stress 0.4189508 
Run 18 stress 0.3169414 
Run 19 stress 0.3097598 
Run 20 stress 0.4189585 
Square root transformation
Wisconsin double standardization
Run 0 stress 0.2175042 
Run 1 stress 0.2171887 
... New best solution
... procrustes: rmse 0.02458901  max resid 0.1398951 
Run 2 stress 0.2201059 
Run 3 stress 0.2250702 
Run 4 stress 0.2199631 
Run 5 stress 0.2179218 
Run 6 stress 0.2229787 
Run 7 stress 0.2165832 
... New best solution
... procrustes: rmse 0.01676595  max resid 0.1245146 
Run 8 stress 0.2218961 
Run 9 stress 0.217447 
Run 10 stress 0.2211216 
Run 11 stress 0.2183106 
Run 12 stress 0.2182699 
Run 13 stress 0.2194017 
Run 14 stress 0.2171168 
Run 15 stress 0.2214954 
Run 16 stress 0.2207491 
Run 17 stress 0.2174462 
Run 18 stress 0.2185349 
Run 19 stress 0.2218827 
Run 20 stress 0.2179202 
Square root transformation
Wisconsin double standardization
Run 0 stress 0.2030314 
Run 1 stress 0.2027835 
... New best solution
... procrustes: rmse 0.01872686  max resid 0.1013906 
Run 2 stress 0.201681 
... New best solution
... procrustes: rmse 0.02833396  max resid 0.1375546 
Run 3 stress 0.2020347 
... procrustes: rmse 0.01107319  max resid 0.1180968 
Run 4 stress 0.2041156 
Run 5 stress 0.204162 
Run 6 stress 0.2029962 
Run 7 stress 0.2024077 
Run 8 stress 0.2026982 
Run 9 stress 0.2046391 
Run 10 stress 0.2022102 
Run 11 stress 0.2015739 
... New best solution
... procrustes: rmse 0.02336402  max resid 0.1434586 
Run 12 stress 0.2014856 
... New best solution
... procrustes: rmse 0.009647457  max resid 0.09737341 
Run 13 stress 0.2050764 
Run 14 stress 0.2026347 
Run 15 stress 0.4189965 
Run 16 stress 0.2008764 
... New best solution
... procrustes: rmse 0.01165942  max resid 0.1113377 
Run 17 stress 0.2037579 
Run 18 stress 0.2024411 
Run 19 stress 0.2040616 
Run 20 stress 0.2022927 
Square root transformation
Wisconsin double standardization
Run 0 stress 0.2181834 
Run 1 stress 0.2245929 
Run 2 stress 0.2212309 
Run 3 stress 0.2207496 
Run 4 stress 0.2198986 
Run 5 stress 0.2213678 
Run 6 stress 0.2173055 
... New best solution
... procrustes: rmse 0.009518584  max resid 0.08241352 
Run 7 stress 0.2193386 
Run 8 stress 0.2256982 
Run 9 stress 0.418944 
Run 10 stress 0.2181331 
Run 11 stress 0.2199544 
Run 12 stress 0.4189499 
Run 13 stress 0.2241071 
Run 14 stress 0.2206238 
Run 15 stress 0.2261741 
Run 16 stress 0.2202512 
Run 17 stress 0.2255324 
Run 18 stress 0.2170376 
... New best solution
... procrustes: rmse 0.00725791  max resid 0.08386704 
Run 19 stress 0.2309696 
Run 20 stress 0.2213828 
Square root transformation
Wisconsin double standardization
Run 0 stress 0.213505 
Run 1 stress 0.2146395 
Run 2 stress 0.2158451 
Run 3 stress 0.4188531 
Run 4 stress 0.2162048 
Run 5 stress 0.2143637 
Run 6 stress 0.2129681 
... New best solution
... procrustes: rmse 0.01970531  max resid 0.1414625 
Run 7 stress 0.2169088 
Run 8 stress 0.217129 
Run 9 stress 0.214608 
Run 10 stress 0.2155086 
Run 11 stress 0.2130278 
... procrustes: rmse 0.01639995  max resid 0.143761 
Run 12 stress 0.2154133 
Run 13 stress 0.2163192 
Run 14 stress 0.2136369 
Run 15 stress 0.2140566 
Run 16 stress 0.2201632 
Run 17 stress 0.2168475 
Run 18 stress 0.2132374 
... procrustes: rmse 0.01898704  max resid 0.1427878 
Run 19 stress 0.2160973 
Run 20 stress 0.2151396 
Square root transformation
Wisconsin double standardization
Run 0 stress 0.2445091 
Run 1 stress 0.2390363 
... New best solution
... procrustes: rmse 0.03618717  max resid 0.1622581 
Run 2 stress 0.2390225 
... New best solution
... procrustes: rmse 0.02675696  max resid 0.1811692 
Run 3 stress 0.244366 
Run 4 stress 0.2469441 
Run 5 stress 0.2464644 
Run 6 stress 0.2423661 
Run 7 stress 0.2444409 
Run 8 stress 0.2438853 
Run 9 stress 0.2396306 
Run 10 stress 0.2444932 
Run 11 stress 0.2460162 
Run 12 stress 0.2418131 
Run 13 stress 0.2435337 
Run 14 stress 0.2457756 
Run 15 stress 0.2461296 
Run 16 stress 0.2424575 
Run 17 stress 0.2435973 
Run 18 stress 0.2439488 
Run 19 stress 0.2367106 
... New best solution
... procrustes: rmse 0.02716894  max resid 0.1589136 
Run 20 stress 0.2435418 
Square root transformation
Wisconsin double standardization
Run 0 stress 0.2126107 
Run 1 stress 0.2166608 
Run 2 stress 0.2161175 
Run 3 stress 0.2177959 
Run 4 stress 0.2131232 
Run 5 stress 0.2118112 
... New best solution
... procrustes: rmse 0.01932961  max resid 0.1604795 
Run 6 stress 0.2143053 
Run 7 stress 0.2172715 
Run 8 stress 0.213649 
Run 9 stress 0.2155582 
Run 10 stress 0.2259611 
Run 11 stress 0.2176317 
Run 12 stress 0.2156092 
Run 13 stress 0.2216555 
Run 14 stress 0.2151244 
Run 15 stress 0.2134828 
Run 16 stress 0.2183714 
Run 17 stress 0.2150234 
Run 18 stress 0.2144987 
Run 19 stress 0.2168899 
Run 20 stress 0.2116765 
... New best solution
... procrustes: rmse 0.00721256  max resid 0.09556044 
Square root transformation
Wisconsin double standardization
Run 0 stress 0.2625669 
Run 1 stress 0.2638941 
Run 2 stress 0.2644776 
Run 3 stress 0.2699065 
Run 4 stress 0.266605 
Run 5 stress 0.2678898 
Run 6 stress 0.2665244 
Run 7 stress 0.2616436 
... New best solution
... procrustes: rmse 0.02230922  max resid 0.1555541 
Run 8 stress 0.2608355 
... New best solution
... procrustes: rmse 0.02048264  max resid 0.1047469 
Run 9 stress 0.2712026 
Run 10 stress 0.2620757 
Run 11 stress 0.2667268 
Run 12 stress 0.2623281 
Run 13 stress 0.2688867 
Run 14 stress 0.2685515 
Run 15 stress 0.265638 
Run 16 stress 0.2709352 
Run 17 stress 0.2675698 
Run 18 stress 0.2687303 
Run 19 stress 0.2654582 
Run 20 stress 0.273199 
Square root transformation
Wisconsin double standardization
Run 0 stress 0.255568 
Run 1 stress 0.2606083 
Run 2 stress 0.2636984 
Run 3 stress 0.2562182 
Run 4 stress 0.2650849 
Run 5 stress 0.2614314 
Run 6 stress 0.2554093 
... New best solution
... procrustes: rmse 0.02145027  max resid 0.1044858 
Run 7 stress 0.2555421 
... procrustes: rmse 0.02356887  max resid 0.1186675 
Run 8 stress 0.2656055 
Run 9 stress 0.2586464 
Run 10 stress 0.2587184 
Run 11 stress 0.2597864 
Run 12 stress 0.2665255 
Run 13 stress 0.2650768 
Run 14 stress 0.2549842 
... New best solution
... procrustes: rmse 0.02163776  max resid 0.1025825 
Run 15 stress 0.2622467 
Run 16 stress 0.2586789 
Run 17 stress 0.2571542 
Run 18 stress 0.258006 
Run 19 stress 0.2555882 
Run 20 stress 0.2629609 

In [168]:
%%R

tbl.c = do.call(rbind, elps.tbls)
tbl.c$percIncorp = factor(tbl.c$percIncorp, levels=sort(unique(as.numeric(tbl.c$percIncorp))))
tbl.c$percTaxa = factor(tbl.c$percTaxa, levels=sort(unique(as.numeric(tbl.c$percTaxa))))
tbl.c %>% head


       NMDS1       NMDS2       Area library    BD.range BD.min BD.max BD.mid
1 -1.5641348  0.27275313 0.65747769       1  1.708,1.71  1.708  1.710  1.709
2 -1.4843924 -0.04233845 0.16861984       1  1.71,1.712  1.710  1.712  1.711
3 -1.3825058  0.12660595 0.04950627       1 1.712,1.714  1.712  1.714  1.713
4 -0.8233783  0.03392720 0.11764136       1 1.714,1.716  1.714  1.716  1.715
5 -0.7456088 -0.07559484 0.07260848       1 1.716,1.718  1.716  1.718  1.717
6 -0.3917073 -0.03839674 0.08640746       1  1.718,1.72  1.718  1.720  1.719
  percIncorp percTaxa
1          0       10
2          0       10
3          0       10
4          0       10
5          0       10
6          0       10

In [169]:
%%R -w 1000 -h 800

ggplot(tbl.c, aes(NMDS1, NMDS2, color=library, size=BD.mid)) +
    geom_point(alpha=0.5) +
    geom_point(color='black', shape='O', alpha=0.6) +
    scale_size_continuous(range=c(2,6)) +
    facet_grid(percTaxa ~ percIncorp, scales='free') +
    theme_bw() +
    theme(
        text = element_text(size=16)
        )



In [170]:
%%R -w 1000 -h 300

tbl.c.f = tbl.c %>%
    filter(percIncorp == 100)

ggplot(tbl.c.f, aes(NMDS1, NMDS2, color=library, size=BD.mid)) +
    geom_point(alpha=0.5) +
    geom_point(color='black', shape='O', alpha=0.6) +
    scale_size_continuous(range=c(2,6)) +
    facet_grid(. ~ percTaxa, scales='free') +
    theme_bw() +
    theme(
        text = element_text(size=16)
        )


Indepedent plots


In [164]:
%%R -w 1000 -h 250

tbl.c.f = tbl.c %>%
    filter(percIncorp == 100)

percTaxa.u = tbl.c.f$percTaxa %>% unique %>% sort


make_plot = function(tbl, percTaxa.cut, title=''){
    tbl.f = filter(tbl, percTaxa == percTaxa.cut)
    
    p = ggplot(tbl.f, aes(NMDS1, NMDS2, color=library, size=BD.mid)) +
        geom_point(alpha=0.5) +
        geom_point(color='black', shape='O', alpha=0.6) +
        scale_size_continuous(range=c(2,6)) +
        labs(title=title) +
        theme_bw() +
        theme(
            text = element_text(size=16),
            axis.title.x = element_blank(),
            axis.title.y = element_blank(),
            legend.position = 'none'
            
            )
    return(p)
    }


p1 = make_plot(tbl.c.f, percTaxa.u[1], title=percTaxa.u[1])
p2 = make_plot(tbl.c.f, percTaxa.u[2], title=percTaxa.u[2])
p3 = make_plot(tbl.c.f, percTaxa.u[3], title=percTaxa.u[3])
p4 = make_plot(tbl.c.f, percTaxa.u[4], title=percTaxa.u[4])

grid.arrange(p1,p2,p3,p4,nrow=1)



In [ ]:



OLD


In [52]:
%%R
# test run of functions
files = get.files(tbl[5,'dirs'], base.path=inFileDir, pattern='OTU_n2_abs1e9_sub-norm_filt.physeq')
otu.tbls = get.otu.tbls(files)
otu.tbl = join.otu.tbls(otu.tbls)
# making metadata
meta = make.meta(otu.tbl)
ord = metaMDS(otu.tbl %>% t)
# getting ellipse
plot(ord, type = "p", display='sites')
elps = ordiellipse(ord, groups=meta$groups, kind="se", conf=0.95, lwd=2, col="blue")
# edit ellipse file
elps.tbl = edit.ellipse(elps)


Square root transformation
Wisconsin double standardization
Run 0 stress 0.2778789 
Run 1 stress 0.2682116 
... New best solution
... procrustes: rmse 0.03657902  max resid 0.1814301 
Run 2 stress 0.2737173 
Run 3 stress 0.2729629 
Run 4 stress 0.2663702 
... New best solution
... procrustes: rmse 0.03717252  max resid 0.1773461 
Run 5 stress 0.2716407 
Run 6 stress 0.2736374 
Run 7 stress 0.270208 
Run 8 stress 0.2658811 
... New best solution
... procrustes: rmse 0.03336031  max resid 0.1934285 
Run 9 stress 0.2722786 
Run 10 stress 0.2743965 
Run 11 stress 0.2713463 
Run 12 stress 0.2723356 
Run 13 stress 0.2720634 
Run 14 stress 0.2739934 
Run 15 stress 0.2687192 
Run 16 stress 0.2745111 
Run 17 stress 0.2683134 
Run 18 stress 0.2704547 
Run 19 stress 0.2659728 
... procrustes: rmse 0.03626291  max resid 0.1879366 
Run 20 stress 0.2688132 

In [44]:
%%R
# ordination of combined OTU table
ord = metaMDS(otu.tbl %>% t)


Square root transformation
Wisconsin double standardization
Run 0 stress 0.2778789 
Run 1 stress 0.2740874 
... New best solution
... procrustes: rmse 0.03909948  max resid 0.1566447 
Run 2 stress 0.263295 
... New best solution
... procrustes: rmse 0.03815336  max resid 0.1820028 
Run 3 stress 0.2622787 
... New best solution
... procrustes: rmse 0.03482414  max resid 0.1935661 
Run 4 stress 0.2689901 
Run 5 stress 0.276454 
Run 6 stress 0.2656546 
Run 7 stress 0.2773779 
Run 8 stress 0.2704998 
Run 9 stress 0.2619899 
... New best solution
... procrustes: rmse 0.0354443  max resid 0.1840365 
Run 10 stress 0.2761005 
Run 11 stress 0.270011 
Run 12 stress 0.2707878 
Run 13 stress 0.2671646 
Run 14 stress 0.269292 
Run 15 stress 0.272816 
Run 16 stress 0.2779202 
Run 17 stress 0.2729177 
Run 18 stress 0.2730189 
Run 19 stress 0.2753001 
Run 20 stress 0.2696346 

In [45]:
%%R

plot(ord, type = "p", display='sites')
elps = ordiellipse(ord, groups=meta$groups, kind="se", conf=0.95, lwd=2, col="blue")



In [46]:
%%R

# midpoint function
mid = function(x, y){ (x + y)/2 }

elps.tbl = elps %>% summary %>% t %>% as.data.frame
elps.tbl$meta = rownames(elps.tbl)
elps.tbl = elps.tbl %>%
    separate(meta, c('library','BD.range'), sep='__') %>%
    mutate(BD.range = gsub('^.', '', BD.range),
           BD.range = gsub('.$', '', BD.range)) %>%
    separate(BD.range, c('BD.min','BD.max'), sep=',', remove=F) %>%
    mutate(
        BD.min = as.numeric(BD.min),
        BD.max = as.numeric(BD.max),
        BD.mid = mapply(mid, BD.min, BD.max))
    
elps.tbl %>% head


        NMDS1        NMDS2       Area library    BD.range BD.min BD.max BD.mid
1 -0.64078722 -0.064522299 0.01671614       1 1.708,1.712  1.708  1.712  1.710
2 -0.43401295 -0.052903995 0.01684263       1 1.712,1.716  1.712  1.716  1.714
3 -0.26978814 -0.013581894 0.01052575       1  1.716,1.72  1.716  1.720  1.718
4 -0.09839450 -0.009202702 0.01019477       1  1.72,1.724  1.720  1.724  1.722
5  0.03367155 -0.063996674 0.01297468       1 1.724,1.728  1.724  1.728  1.726
6  0.16095586 -0.010542351 0.01396157       1 1.728,1.732  1.728  1.732  1.730

In [47]:
%%R -h 400

ggplot(elps.tbl, aes(NMDS1, NMDS2, color=library, size=BD.mid)) +
    geom_point() +
    scale_size_continuous(range=c(2,7)) +
    theme_bw() +
    theme(
        text = element_text(size=16)
        )



In [ ]:


OLD


In [22]:
%%R -i inFileDir

files = list.files(inFileDir, pattern='OTU_n2_abs1e9_sub-norm_filt.physeq', recursive=T) %>% as.data.frame
colnames(files) = c('files')
files %>% head


                                       files
1 0/10/10/OTU_n2_abs1e9_sub-norm_filt.physeq
2 0/10/11/OTU_n2_abs1e9_sub-norm_filt.physeq
3 0/10/12/OTU_n2_abs1e9_sub-norm_filt.physeq
4 0/10/13/OTU_n2_abs1e9_sub-norm_filt.physeq
5 0/10/14/OTU_n2_abs1e9_sub-norm_filt.physeq
6 0/10/15/OTU_n2_abs1e9_sub-norm_filt.physeq

In [24]:
%%R
# file metadata
percTaxa.to.keep = c(1,10,25,50)

tbl = files %>%
    separate(files, c('percIncorp','percTaxa','rep','file'), sep='/', remove=F) %>%
    filter(percIncorp == 100, percTaxa %in% percTaxa.to.keep)
tbl %>% nrow %>% print
tbl %>% head


[1] 80
                                         files percIncorp percTaxa rep
1 100/10/10/OTU_n2_abs1e9_sub-norm_filt.physeq        100       10  10
2 100/10/11/OTU_n2_abs1e9_sub-norm_filt.physeq        100       10  11
3 100/10/12/OTU_n2_abs1e9_sub-norm_filt.physeq        100       10  12
4 100/10/13/OTU_n2_abs1e9_sub-norm_filt.physeq        100       10  13
5 100/10/14/OTU_n2_abs1e9_sub-norm_filt.physeq        100       10  14
6 100/10/15/OTU_n2_abs1e9_sub-norm_filt.physeq        100       10  15
                                file
1 OTU_n2_abs1e9_sub-norm_filt.physeq
2 OTU_n2_abs1e9_sub-norm_filt.physeq
3 OTU_n2_abs1e9_sub-norm_filt.physeq
4 OTU_n2_abs1e9_sub-norm_filt.physeq
5 OTU_n2_abs1e9_sub-norm_filt.physeq
6 OTU_n2_abs1e9_sub-norm_filt.physeq

In [25]:
%%R -i workDir

tbl.l = list()
for(i in tbl$files){
    p = paste(c(workDir, 'atomIncorp_taxaIncorp', i), collapse='/')
    ord = readRDS(p)
    
    id = gsub('/[^/]+$', '', i)
    id = gsub('/', '__', id)
    id = paste(c(id,'__'), collapse='')
    otu.tbl = ord %>% otu_table %>% as.data.frame
    colnames(otu.tbl) = gsub('^', id, colnames(otu.tbl))
    otu.tbl$taxon = rownames(otu.tbl)
    tbl.l[[i]] = otu.tbl
    
    #    otu.tbl %>% head %>% print; break
    
    #samp.data = tbl.l[[i]] %>% sample_data %>% as.data.frame
    #samp.data$file = i
    #sample_data(tbl.l[[i]]) = samp.data
    
    
    #ord = ordinate(physeq, method='NMDS', distance='bray')
    #tbl.l[[i]] = ord
    }

exmp = tbl.l[[names(tbl.l)[1]]]

In [10]:
%%R
# joining all OTU tables

n = names(tbl.l)

tbl = tbl.l[[n[1]]]
for (i in seq(2:length(n))){
    n.i = n[i]
    tbl = left_join(tbl, tbl.l[[n.i]], c('taxon'='taxon'))
    }

# edit table
tbl[is.na(tbl)] = 0
rownames(tbl) = tbl$taxon
tbl$taxon = NULL

In [11]:
%%R
tbl %>% ncol %>% print
tbl %>% nrow %>% print
tbl[1:5,1:5]


[1] 8062
[1] 484
                                     0__10__10__1__1.710-1.711.x
Acaryochloris_marina_MBIC11017                               211
Acetobacter_pasteurianus_IFO_3283-12                         199
Acetohalobium_arabaticum_DSM_5501                              0
Achromobacter_xylosoxidans_A8                                  2
Acidaminococcus_fermentans_DSM_20731                          14
                                     0__10__10__1__1.711-1.714.x
Acaryochloris_marina_MBIC11017                                82
Acetobacter_pasteurianus_IFO_3283-12                         229
Acetohalobium_arabaticum_DSM_5501                              0
Achromobacter_xylosoxidans_A8                                  1
Acidaminococcus_fermentans_DSM_20731                          18
                                     0__10__10__1__1.714-1.719.x
Acaryochloris_marina_MBIC11017                                 5
Acetobacter_pasteurianus_IFO_3283-12                          62
Acetohalobium_arabaticum_DSM_5501                              0
Achromobacter_xylosoxidans_A8                                  2
Acidaminococcus_fermentans_DSM_20731                          23
                                     0__10__10__1__1.719-1.725.x
Acaryochloris_marina_MBIC11017                                 0
Acetobacter_pasteurianus_IFO_3283-12                          11
Acetohalobium_arabaticum_DSM_5501                              0
Achromobacter_xylosoxidans_A8                                 23
Acidaminococcus_fermentans_DSM_20731                           9
                                     0__10__10__1__1.725-1.732.x
Acaryochloris_marina_MBIC11017                                 1
Acetobacter_pasteurianus_IFO_3283-12                           0
Acetohalobium_arabaticum_DSM_5501                              0
Achromobacter_xylosoxidans_A8                                 40
Acidaminococcus_fermentans_DSM_20731                           0

In [12]:
%%R
# creating metadata table

meta = tbl %>% colnames %>% as.data.frame 
colnames(meta) = c('X')
meta = filter(meta, X != 'taxon')

# midpoint function
mid = function(x, y){ (x + y)/2 }

# making fixed BD-range
BD.range = seq(1.6, 1.9, 0.004)

# edit metadata
meta = meta %>%
    separate(X, c('percIncorp','percTaxa','rep','library','BD_range'), sep='__', remove=F) %>%
    mutate(BD_range = gsub('.x$', '', BD_range)) %>%
    separate(BD_range, c('BD.min', 'BD.max'), sep='-') %>%
    mutate(BD.min = as.numeric(BD.min),
           BD.max = as.numeric(BD.max),
           BD.mid = mapply(mid, BD.min, BD.max),   
           BD.bin = cut(BD.mid, breaks=BD.range))

meta %>% head


                            X percIncorp percTaxa rep library BD.min BD.max
1 0__10__10__1__1.710-1.711.x          0       10  10       1  1.710  1.711
2 0__10__10__1__1.711-1.714.x          0       10  10       1  1.711  1.714
3 0__10__10__1__1.714-1.719.x          0       10  10       1  1.714  1.719
4 0__10__10__1__1.719-1.725.x          0       10  10       1  1.719  1.725
5 0__10__10__1__1.725-1.732.x          0       10  10       1  1.725  1.732
6 0__10__10__1__1.732-1.737.x          0       10  10       1  1.732  1.737
  BD.mid        BD.bin
1 1.7105 (1.708,1.712]
2 1.7125 (1.712,1.716]
3 1.7165  (1.716,1.72]
4 1.7220  (1.72,1.724]
5 1.7285 (1.728,1.732]
6 1.7345 (1.732,1.736]

In [14]:
%%R
# test table

tbl.test = tbl[,1:100]

In [15]:
%%R
# ordination of combined OTU table

ord = metaMDS(tbl.test %>% t)
## Start from previous best solution
ord = metaMDS(tbl.test %>% t, previous.best = ord)


Square root transformation
Wisconsin double standardization
Run 0 stress 0.1623307 
Run 1 stress 0.1731224 
Run 2 stress 0.165822 
Run 3 stress 0.1716225 
Run 4 stress 0.1705758 
Run 5 stress 0.1650397 
Run 6 stress 0.1653182 
Run 7 stress 0.1623307 
... procrustes: rmse 1.117674e-05  max resid 5.566491e-05 
*** Solution reached
Square root transformation
Wisconsin double standardization
Starting from 2-dimensional configuration
Run 0 stress 0.1623307 
Run 1 stress 0.162311 
... New best solution
... procrustes: rmse 0.002547498  max resid 0.01668618 
Run 2 stress 0.1658162 
Run 3 stress 0.1890312 
Run 4 stress 0.1740537 
Run 5 stress 0.1856413 
Run 6 stress 0.1888861 
Run 7 stress 0.162311 
... New best solution
... procrustes: rmse 1.096711e-05  max resid 4.151778e-05 
*** Solution reached

In [16]:
%%R

plot(ord, type = "p", display='sites')
elps = ordiellipse(ord, groups=meta$BD.bin, kind="se", conf=0.95, lwd=2, col="blue")


Error in pts[gr, ] : subscript out of bounds
In addition: Warning messages:
1: In mutate_impl(.data, dots) : NAs introduced by coercion
2: In complete.cases(pts) & !is.na(groups) :
  longer object length is not a multiple of shorter object length
Error in pts[gr, ] : subscript out of bounds

In [17]:
%%R

elps = elps %>% summary %>% t %>% as.data.frame
elps


Error in eval(expr, envir, enclos) : object 'elps' not found

In [18]:
%%R

ggplot(elps, aes(NMDS1, NMDS2)) +
    geom_point()


Error in ggplot(elps, aes(NMDS1, NMDS2)) : object 'elps' not found

WAITING


OLD


In [40]:
%%R
# getting list of BD-value tables

# making fixed BD-range
BD.range = seq(1.6, 1.9, 0.004)

# midpoint function
mid = function(x, y){ (x + y)/2 }

# defining BD range & binning
get.BD.range = function(ord, BD.range){
    tbl = ord %>% scores %>% as.data.frame
    
    # metadata from rowname
    tbl$lib = gsub('__.+', '', rownames(tbl)) %>% as.character
    tbl$BD.start = gsub('.+__([0-9.]+)-.+', '\\1', rownames(tbl)) %>% as.numeric
    tbl$BD.end = gsub('.+-', '', rownames(tbl)) %>% as.numeric
    tbl$BD.mid = mapply(mid, tbl$BD.start, tbl$BD.end)
    
    # binning BD.mid
    tbl$BD.bin = cut(tbl$BD.mid, breaks=BD.range)
    tbl = unite(tbl, group, lib, BD.bin, sep='__')
    
    return(tbl)
    }

tbl.l.BD = lapply(tbl.l, get.BD.range, BD.range=BD.range)

In [41]:
%%R
# combining lists

tbl.l.c = list()
for(f in names(tbl.l)){
    tbl.l.c[[f]][['ord']] = tbl.l[[f]]
    tbl.l.c[[f]][['meta']] = tbl.l.BD[[f]]
    }

In [43]:
%%R

get.ellipse = function(x){
    meta = x$meta    
    ord = x$ord
    
    plot(ord, type = "p", display='sites')
    elps = ordiellipse(ord, meta$group, kind="se", conf=0.95, lwd=2, col="blue")

    elps = elps %>% summary %>% t %>% as.data.frame
    return(elps)
    }

tbl.elps = lapply(tbl.l.c, get.ellipse)



In [ ]:


In [39]:
%%R

tbl = do.call(rbind, tbl.elps)
tbl %>% head


                                                              NMDS1
0/10/10/OTU_n2_abs1e9_sub-norm_filt.physeq.(1.708,1.712] -1.4922898
0/10/10/OTU_n2_abs1e9_sub-norm_filt.physeq.(1.712,1.716] -1.0738806
0/10/10/OTU_n2_abs1e9_sub-norm_filt.physeq.(1.716,1.72]  -0.6329284
0/10/10/OTU_n2_abs1e9_sub-norm_filt.physeq.(1.72,1.724]  -0.2077575
0/10/10/OTU_n2_abs1e9_sub-norm_filt.physeq.(1.728,1.732]  0.3839594
0/10/10/OTU_n2_abs1e9_sub-norm_filt.physeq.(1.732,1.736]  0.7478332
                                                                NMDS2
0/10/10/OTU_n2_abs1e9_sub-norm_filt.physeq.(1.708,1.712]  0.144040255
0/10/10/OTU_n2_abs1e9_sub-norm_filt.physeq.(1.712,1.716] -0.071715299
0/10/10/OTU_n2_abs1e9_sub-norm_filt.physeq.(1.716,1.72]  -0.056693800
0/10/10/OTU_n2_abs1e9_sub-norm_filt.physeq.(1.72,1.724]  -0.026088529
0/10/10/OTU_n2_abs1e9_sub-norm_filt.physeq.(1.728,1.732] -0.006658269
0/10/10/OTU_n2_abs1e9_sub-norm_filt.physeq.(1.732,1.736] -0.025774021
                                                                 Area
0/10/10/OTU_n2_abs1e9_sub-norm_filt.physeq.(1.708,1.712]          NaN
0/10/10/OTU_n2_abs1e9_sub-norm_filt.physeq.(1.712,1.716] 2.940128e-09
0/10/10/OTU_n2_abs1e9_sub-norm_filt.physeq.(1.716,1.72]  7.165913e-11
0/10/10/OTU_n2_abs1e9_sub-norm_filt.physeq.(1.72,1.724]  2.379718e-12
0/10/10/OTU_n2_abs1e9_sub-norm_filt.physeq.(1.728,1.732] 0.000000e+00
0/10/10/OTU_n2_abs1e9_sub-norm_filt.physeq.(1.732,1.736]          NaN

In [ ]: