In [ ]:
    
import pandas as pd
import numpy as np
    
Note: this notebook requires operating on the original UniFrac distances matrices, which are very large.
To reproduce, download the following files from ftp://ftp.microbio.me/emp/release1/results/beta_diversity/deblur/:
emp_150_gg_1k_unweighted_unifrac.txt
emp_90_gg_1k_unweighted_unifrac.txt
In [34]:
    
# read in samples 
samples_150 = pd.read_csv('./emp_150_gg_1k_unweighted_unifrac.txt', nrows=1, sep ='\t', header=None).T[0][1:]
samples_90 = pd.read_csv('./emp_90_gg_1k_unweighted_unifrac.txt', nrows=1, sep ='\t', header=None).T[0][1:]
    
In [18]:
    
# read in sample map
subset_2k_map = pd.read_csv('../../data/mapping-files/emp_qiime_mapping_subset_2k.tsv', header=0, sep='\t')
subset_2k_map.head()
    
    Out[18]:
  
    
       
      #SampleID 
      BarcodeSequence 
      LinkerPrimerSequence 
      Description 
      host_subject_id 
      study_id 
      title 
      principal_investigator 
      doi 
      ebi_accession 
      ... 
      adiv_shannon 
      adiv_faith_pd 
      temperature_deg_c 
      ph 
      salinity_psu 
      oxygen_mg_per_l 
      phosphate_umol_per_l 
      ammonium_umol_per_l 
      nitrate_umol_per_l 
      sulfate_umol_per_l 
     
  
  
    
      0 
      550.L1S116.s.1.sequence 
      ATGCCTGAGCAG 
      GTGCCAGCMGCCGCGGTAA 
      sample_20 stool 
      F4 
      550 
      Moving pictures of the human microbiome 
      Rob Knight 
      10.1186/gb-2011-12-5-r50 
      ERP021896 
      ... 
      3.867414 
      12.457989 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
     
    
      1 
      550.L1S119.s.1.sequence 
      CAGCACTAAGCG 
      GTGCCAGCMGCCGCGGTAA 
      sample_23 stool 
      F4 
      550 
      Moving pictures of the human microbiome 
      Rob Knight 
      10.1186/gb-2011-12-5-r50 
      ERP021896 
      ... 
      3.265164 
      10.719448 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
     
    
      2 
      550.L1S164.s.1.sequence 
      ATGTACGGCGAC 
      GTGCCAGCMGCCGCGGTAA 
      sample_73 stool 
      M3 
      550 
      Moving pictures of the human microbiome 
      Rob Knight 
      10.1186/gb-2011-12-5-r50 
      ERP021896 
      ... 
      3.661124 
      14.214158 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
     
    
      3 
      550.L1S194.s.1.sequence 
      CGAAGACTGCTG 
      GTGCCAGCMGCCGCGGTAA 
      sample_105 stool 
      M3 
      550 
      Moving pictures of the human microbiome 
      Rob Knight 
      10.1186/gb-2011-12-5-r50 
      ERP021896 
      ... 
      4.439943 
      12.012602 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
     
    
      4 
      550.L1S20.s.1.sequence 
      ACGGTGAGTGTC 
      GTGCCAGCMGCCGCGGTAA 
      sample_112 stool 
      F4 
      550 
      Moving pictures of the human microbiome 
      Rob Knight 
      10.1186/gb-2011-12-5-r50 
      ERP021896 
      ... 
      3.327601 
      11.758069 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
     
    
      5 
      550.L1S26.s.1.sequence 
      ACAGCAGTGGTC 
      GTGCCAGCMGCCGCGGTAA 
      sample_175 stool 
      F4 
      550 
      Moving pictures of the human microbiome 
      Rob Knight 
      10.1186/gb-2011-12-5-r50 
      ERP021896 
      ... 
      3.272025 
      10.390166 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
     
    
      6 
      550.L1S264.s.1.sequence 
      CTGTATCGTATG 
      GTGCCAGCMGCCGCGGTAA 
      sample_180 stool 
      M3 
      550 
      Moving pictures of the human microbiome 
      Rob Knight 
      10.1186/gb-2011-12-5-r50 
      ERP021896 
      ... 
      5.732025 
      19.540809 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
     
    
      7 
      550.L1S273.s.1.sequence 
      CCTAGTACTGAT 
      GTGCCAGCMGCCGCGGTAA 
      sample_189 stool 
      M3 
      550 
      Moving pictures of the human microbiome 
      Rob Knight 
      10.1186/gb-2011-12-5-r50 
      ERP021896 
      ... 
      4.191533 
      14.265016 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
     
    
      8 
      550.L2S103.s.2.sequence 
      AGCACGAGCCTA 
      GTGCCAGCMGCCGCGGTAA 
      sample_380 sebum 
      F4 
      550 
      Moving pictures of the human microbiome 
      Rob Knight 
      10.1186/gb-2011-12-5-r50 
      ERP021896 
      ... 
      7.570207 
      52.371958 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
     
    
      9 
      550.L2S144.s.2.sequence 
      AGTACGCTCGAG 
      GTGCCAGCMGCCGCGGTAA 
      sample_425 sebum 
      F4 
      550 
      Moving pictures of the human microbiome 
      Rob Knight 
      10.1186/gb-2011-12-5-r50 
      ERP021896 
      ... 
      6.181649 
      38.653723 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
     
    
      10 
      550.L2S164.s.2.sequence 
      ACTACAGCCTAT 
      GTGCCAGCMGCCGCGGTAA 
      sample_447 sebum 
      F4 
      550 
      Moving pictures of the human microbiome 
      Rob Knight 
      10.1186/gb-2011-12-5-r50 
      ERP021896 
      ... 
      5.828009 
      37.769790 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
     
    
      11 
      550.L2S166.s.2.sequence 
      AGATCGGCTCGA 
      GTGCCAGCMGCCGCGGTAA 
      sample_449 sebum 
      F4 
      550 
      Moving pictures of the human microbiome 
      Rob Knight 
      10.1186/gb-2011-12-5-r50 
      ERP021896 
      ... 
      5.952527 
      42.317611 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
     
    
      12 
      550.L2S303.s.2.sequence 
      CTCATGTACAGT 
      GTGCCAGCMGCCGCGGTAA 
      sample_597 sebum 
      M3 
      550 
      Moving pictures of the human microbiome 
      Rob Knight 
      10.1186/gb-2011-12-5-r50 
      ERP021896 
      ... 
      1.868610 
      17.649715 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
     
    
      13 
      550.L2S377.s.2.sequence 
      CCTCTCGTGATC 
      GTGCCAGCMGCCGCGGTAA 
      sample_676 sebum 
      M3 
      550 
      Moving pictures of the human microbiome 
      Rob Knight 
      10.1186/gb-2011-12-5-r50 
      ERP021896 
      ... 
      6.057640 
      35.644789 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
     
    
      14 
      550.L4S1.s.4.sequence 
      CATGGCTACACA 
      GTGCCAGCMGCCGCGGTAA 
      sample_1111 sebum 
      M3 
      550 
      Moving pictures of the human microbiome 
      Rob Knight 
      10.1186/gb-2011-12-5-r50 
      ERP021896 
      ... 
      4.850969 
      34.136518 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
     
    
      15 
      550.L4S102.s.4.sequence 
      GCAATAGCTGCT 
      GTGCCAGCMGCCGCGGTAA 
      sample_1115 sebum 
      M3 
      550 
      Moving pictures of the human microbiome 
      Rob Knight 
      10.1186/gb-2011-12-5-r50 
      ERP021896 
      ... 
      7.066361 
      53.006615 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
     
    
      16 
      550.L4S122.s.4.sequence 
      GACCACTACGAT 
      GTGCCAGCMGCCGCGGTAA 
      sample_1137 sebum 
      M3 
      550 
      Moving pictures of the human microbiome 
      Rob Knight 
      10.1186/gb-2011-12-5-r50 
      ERP021896 
      ... 
      4.212268 
      25.715900 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
     
    
      17 
      550.L4S140.s.4.sequence 
      GAGTCTGAGTCT 
      GTGCCAGCMGCCGCGGTAA 
      sample_1157 sebum 
      M3 
      550 
      Moving pictures of the human microbiome 
      Rob Knight 
      10.1186/gb-2011-12-5-r50 
      ERP021896 
      ... 
      2.630993 
      13.920620 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
     
    
      18 
      550.L4S158.s.4.sequence 
      GCAGGATAGATA 
      GTGCCAGCMGCCGCGGTAA 
      sample_1176 sebum 
      M3 
      550 
      Moving pictures of the human microbiome 
      Rob Knight 
      10.1186/gb-2011-12-5-r50 
      ERP021896 
      ... 
      5.616810 
      30.118912 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
     
    
      19 
      550.L4S161.s.4.sequence 
      GAAGTCTCGCAT 
      GTGCCAGCMGCCGCGGTAA 
      sample_1180 sebum 
      M3 
      550 
      Moving pictures of the human microbiome 
      Rob Knight 
      10.1186/gb-2011-12-5-r50 
      ERP021896 
      ... 
      7.304861 
      55.092754 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
     
    
      20 
      550.L4S183.s.4.sequence 
      GCGGATGTGACT 
      GTGCCAGCMGCCGCGGTAA 
      sample_1204 sebum 
      M3 
      550 
      Moving pictures of the human microbiome 
      Rob Knight 
      10.1186/gb-2011-12-5-r50 
      ERP021896 
      ... 
      4.891649 
      24.036843 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
     
    
      21 
      550.L5S119.s.5.sequence 
      CAGCACTAAGCG 
      GTGCCAGCMGCCGCGGTAA 
      sample_1403 saliva 
      F4 
      550 
      Moving pictures of the human microbiome 
      Rob Knight 
      10.1186/gb-2011-12-5-r50 
      ERP021896 
      ... 
      3.647179 
      9.600555 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
     
    
      22 
      550.L5S224.s.5.sequence 
      CTGCAGTACTTA 
      GTGCCAGCMGCCGCGGTAA 
      sample_1519 saliva 
      F4 
      550 
      Moving pictures of the human microbiome 
      Rob Knight 
      10.1186/gb-2011-12-5-r50 
      ERP021896 
      ... 
      3.869599 
      10.713225 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
     
    
      23 
      550.L5S258.s.5.sequence 
      CGAGTCTAGTTG 
      GTGCCAGCMGCCGCGGTAA 
      sample_1556 saliva 
      M3 
      550 
      Moving pictures of the human microbiome 
      Rob Knight 
      10.1186/gb-2011-12-5-r50 
      ERP021896 
      ... 
      3.841479 
      10.921526 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
     
    
      24 
      550.L6S104.s.6.sequence 
      TAGATCCTCGAT 
      GTGCCAGCMGCCGCGGTAA 
      sample_1653 saliva 
      M3 
      550 
      Moving pictures of the human microbiome 
      Rob Knight 
      10.1186/gb-2011-12-5-r50 
      ERP021896 
      ... 
      4.329496 
      11.399017 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
     
    
      25 
      550.L6S144.s.6.sequence 
      TAGCGGATCACG 
      GTGCCAGCMGCCGCGGTAA 
      sample_1695 saliva 
      M3 
      550 
      Moving pictures of the human microbiome 
      Rob Knight 
      10.1186/gb-2011-12-5-r50 
      ERP021896 
      ... 
      3.687083 
      10.642233 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
     
    
      26 
      550.L6S149.s.6.sequence 
      GTGTCTACATTG 
      GTGCCAGCMGCCGCGGTAA 
      sample_1700 saliva 
      M3 
      550 
      Moving pictures of the human microbiome 
      Rob Knight 
      10.1186/gb-2011-12-5-r50 
      ERP021896 
      ... 
      4.226336 
      12.062299 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
     
    
      27 
      550.L6S176.s.6.sequence 
      TAGTCGTCTAGT 
      GTGCCAGCMGCCGCGGTAA 
      sample_1730 saliva 
      M3 
      550 
      Moving pictures of the human microbiome 
      Rob Knight 
      10.1186/gb-2011-12-5-r50 
      ERP021896 
      ... 
      3.991563 
      11.172998 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
     
    
      28 
      550.L6S186.s.6.sequence 
      GTAGCTGACGCA 
      GTGCCAGCMGCCGCGGTAA 
      sample_1741 saliva 
      M3 
      550 
      Moving pictures of the human microbiome 
      Rob Knight 
      10.1186/gb-2011-12-5-r50 
      ERP021896 
      ... 
      4.099716 
      11.555792 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
     
    
      29 
      550.L6S197.s.6.sequence 
      ACTCACGGTATG 
      GTGCCAGCMGCCGCGGTAA 
      sample_1752 saliva 
      M3 
      550 
      Moving pictures of the human microbiome 
      Rob Knight 
      10.1186/gb-2011-12-5-r50 
      ERP021896 
      ... 
      3.953951 
      10.386101 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
     
    
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
     
    
      1970 
      2382.GM.181.R4.leav.10.12.lane8.NoIndex.L008.s... 
      CGATGTGTGGTT 
      GTGCCAGCMGCCGCGGTAA 
      Leaves 
      HS4 
      2382 
      The soil microbiome influences grapevine-assoc... 
      Jack Gilbert 
      10.1128/mBio.02527-14 
      ERP006348 
      ... 
      1.335486 
      7.612504 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
     
    
      1971 
      2382.GM.181.R4.gp.10.12.lane8.NoIndex.L008.seq... 
      GATCCTCATGCG 
      GTGCCAGCMGCCGCGGTAA 
      Grapes 
      HS4 
      2382 
      The soil microbiome influences grapevine-assoc... 
      Jack Gilbert 
      10.1128/mBio.02527-14 
      ERP006348 
      ... 
      0.316466 
      5.129634 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
     
    
      1972 
      2382.GM.181.R3.rhizo.10.12.lane7.NoIndex.L007.... 
      TATCCAAGCGCA 
      GTGCCAGCMGCCGCGGTAA 
      Rhizosphere 
      HS4 
      2382 
      The soil microbiome influences grapevine-assoc... 
      Jack Gilbert 
      10.1128/mBio.02527-14 
      ERP006348 
      ... 
      9.029518 
      113.700721 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
     
    
      1973 
      2382.GM.181.R3.leav.10.12.lane8.NoIndex.L008.s... 
      TCCATCGACGTG 
      GTGCCAGCMGCCGCGGTAA 
      Leaves 
      HS4 
      2382 
      The soil microbiome influences grapevine-assoc... 
      Jack Gilbert 
      10.1128/mBio.02527-14 
      ERP006348 
      ... 
      0.336742 
      4.742170 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
     
    
      1974 
      2382.GM.181.R3.gp.10.12.lane8.NoIndex.L008.seq... 
      AGACATACCGTA 
      GTGCCAGCMGCCGCGGTAA 
      Grapes 
      HS4 
      2382 
      The soil microbiome influences grapevine-assoc... 
      Jack Gilbert 
      10.1128/mBio.02527-14 
      ERP006348 
      ... 
      0.121966 
      4.951791 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
     
    
      1975 
      2382.GM.181.R2.root.10.12.lane7.NoIndex.L007.s... 
      GGCATGTTATCG 
      GTGCCAGCMGCCGCGGTAA 
      Roots 
      HS4 
      2382 
      The soil microbiome influences grapevine-assoc... 
      Jack Gilbert 
      10.1128/mBio.02527-14 
      ERP006348 
      ... 
      6.523943 
      48.856413 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
     
    
      1976 
      2382.GM.181.R2.rhizo.10.12.lane7.NoIndex.L007.... 
      ACCAATCTCGGC 
      GTGCCAGCMGCCGCGGTAA 
      Rhizosphere 
      HS4 
      2382 
      The soil microbiome influences grapevine-assoc... 
      Jack Gilbert 
      10.1128/mBio.02527-14 
      ERP006348 
      ... 
      8.553171 
      98.373702 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
     
    
      1977 
      2382.GM.181.R2.leav.10.12.lane8.NoIndex.L008.s... 
      AGCTCTAGAAAC 
      GTGCCAGCMGCCGCGGTAA 
      Leaves 
      HS4 
      2382 
      The soil microbiome influences grapevine-assoc... 
      Jack Gilbert 
      10.1128/mBio.02527-14 
      ERP006348 
      ... 
      0.031598 
      4.116123 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
     
    
      1978 
      2382.GM.181.R2.gp.10.12.lane8.NoIndex.L008.seq... 
      GATCATTCTCTC 
      GTGCCAGCMGCCGCGGTAA 
      Grapes 
      HS4 
      2382 
      The soil microbiome influences grapevine-assoc... 
      Jack Gilbert 
      10.1128/mBio.02527-14 
      ERP006348 
      ... 
      0.144597 
      4.259524 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
     
    
      1979 
      2382.GM.181.R1.leav.10.12.lane8.NoIndex.L008.s... 
      CCAGACCGCTAT 
      GTGCCAGCMGCCGCGGTAA 
      Leaves 
      HS4 
      2382 
      The soil microbiome influences grapevine-assoc... 
      Jack Gilbert 
      10.1128/mBio.02527-14 
      ERP006348 
      ... 
      1.098354 
      8.048152 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
     
    
      1980 
      2382.GM.181.R1.gp.10.12.lane8.NoIndex.L008.seq... 
      TCTGAGGTTGCC 
      GTGCCAGCMGCCGCGGTAA 
      Grapes 
      HS4 
      2382 
      The soil microbiome influences grapevine-assoc... 
      Jack Gilbert 
      10.1128/mBio.02527-14 
      ERP006348 
      ... 
      0.049765 
      4.818847 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
     
    
      1981 
      2382.DPOO1.C1.HA.5.650.gp.9.12.lane8.NoIndex.L... 
      ACCTTACACCTT 
      GTGCCAGCMGCCGCGGTAA 
      Grapes 
      HS3 
      2382 
      The soil microbiome influences grapevine-assoc... 
      Jack Gilbert 
      10.1128/mBio.02527-14 
      ERP006348 
      ... 
      0.172636 
      4.406474 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
     
    
      1982 
      2382.DPOO1.C1.HA.5.649.leav.9.12.lane8.NoIndex... 
      AACCATGCCAAC 
      GTGCCAGCMGCCGCGGTAA 
      Leaves 
      HS7 
      2382 
      The soil microbiome influences grapevine-assoc... 
      Jack Gilbert 
      10.1128/mBio.02527-14 
      ERP006348 
      ... 
      0.020367 
      3.148375 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
     
    
      1983 
      2382.DPOO1.C1.HA.5.648.root.9.12.lane7.NoIndex... 
      GAGGTTCTTGAC 
      GTGCCAGCMGCCGCGGTAA 
      Roots 
      HS7 
      2382 
      The soil microbiome influences grapevine-assoc... 
      Jack Gilbert 
      10.1128/mBio.02527-14 
      ERP006348 
      ... 
      6.035013 
      54.134147 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
     
    
      1984 
      2382.DPOO1.C1.HA.5.448.root.4.12.lane7.NoIndex... 
      AGCCTCATGATG 
      GTGCCAGCMGCCGCGGTAA 
      Roots 
      HS10 
      2382 
      The soil microbiome influences grapevine-assoc... 
      Jack Gilbert 
      10.1128/mBio.02527-14 
      ERP006348 
      ... 
      6.658723 
      61.613640 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
     
    
      1985 
      2382.DPOO1.C1.HA.4.645.gp.9.12.lane8.NoIndex.L... 
      ACTAGCGTTCAG 
      GTGCCAGCMGCCGCGGTAA 
      Grapes 
      HS7 
      2382 
      The soil microbiome influences grapevine-assoc... 
      Jack Gilbert 
      10.1128/mBio.02527-14 
      ERP006348 
      ... 
      0.047735 
      2.771104 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
     
    
      1986 
      2382.DPOO1.C1.HA.4.644.leav.9.12.lane8.NoIndex... 
      GGTAAGTTTGAC 
      GTGCCAGCMGCCGCGGTAA 
      Leaves 
      HS7 
      2382 
      The soil microbiome influences grapevine-assoc... 
      Jack Gilbert 
      10.1128/mBio.02527-14 
      ERP006348 
      ... 
      0.018037 
      1.768118 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
     
    
      1987 
      2382.DPOO1.C1.HA.3.640.gp.9.12.lane8.NoIndex.L... 
      CCGAAGATTCTG 
      GTGCCAGCMGCCGCGGTAA 
      Grapes 
      HS7 
      2382 
      The soil microbiome influences grapevine-assoc... 
      Jack Gilbert 
      10.1128/mBio.02527-14 
      ERP006348 
      ... 
      0.096775 
      4.252410 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
     
    
      1988 
      2382.DPOO1.C1.HA.3.639.leav.9.12.lane8.NoIndex... 
      TGGAATTCGGCT 
      GTGCCAGCMGCCGCGGTAA 
      Leaves 
      HS7 
      2382 
      The soil microbiome influences grapevine-assoc... 
      Jack Gilbert 
      10.1128/mBio.02527-14 
      ERP006348 
      ... 
      0.047049 
      3.368435 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
     
    
      1989 
      2382.DPOO1.C1.HA.3.32.r1.leav.6.11.lane8.NoInd... 
      TGTATCTTCACC 
      GTGCCAGCMGCCGCGGTAA 
      Leaves 
      HS9 
      2382 
      The soil microbiome influences grapevine-assoc... 
      Jack Gilbert 
      10.1128/mBio.02527-14 
      ERP006348 
      ... 
      0.040234 
      5.070915 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
     
    
      1990 
      2382.DPOO1.C1.HA.3.238.root.9.11.lane1.NoIndex... 
      ATCGTGTGTTGG 
      GTGCCAGCMGCCGCGGTAA 
      Roots 
      HS10 
      2382 
      The soil microbiome influences grapevine-assoc... 
      Jack Gilbert 
      10.1128/mBio.02527-14 
      ERP006348 
      ... 
      7.417508 
      76.719597 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
     
    
      1991 
      2382.DPOO1.C1.HA.2.635.gp.9.12.lane8.NoIndex.L... 
      GGCGTTGCATTC 
      GTGCCAGCMGCCGCGGTAA 
      Grapes 
      HS7 
      2382 
      The soil microbiome influences grapevine-assoc... 
      Jack Gilbert 
      10.1128/mBio.02527-14 
      ERP006348 
      ... 
      0.015675 
      2.704737 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
     
    
      1992 
      2382.DPOO1.C1.HA.2.634.leav.9.12.lane8.NoIndex... 
      TCTTCAACTACC 
      GTGCCAGCMGCCGCGGTAA 
      Leaves 
      HS7 
      2382 
      The soil microbiome influences grapevine-assoc... 
      Jack Gilbert 
      10.1128/mBio.02527-14 
      ERP006348 
      ... 
      0.088071 
      2.056739 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
     
    
      1993 
      2382.DPOO1.C1.HA.2.633.root.9.12.lane7.NoIndex... 
      ATGTCACCGCTG 
      GTGCCAGCMGCCGCGGTAA 
      Roots 
      HS1 
      2382 
      The soil microbiome influences grapevine-assoc... 
      Jack Gilbert 
      10.1128/mBio.02527-14 
      ERP006348 
      ... 
      7.351779 
      75.716918 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
     
    
      1994 
      2382.DPOO1.C1.HA.2.433.root.4.12.lane7.NoIndex... 
      CCAAACTCGTCG 
      GTGCCAGCMGCCGCGGTAA 
      Roots 
      HS10 
      2382 
      The soil microbiome influences grapevine-assoc... 
      Jack Gilbert 
      10.1128/mBio.02527-14 
      ERP006348 
      ... 
      7.070601 
      69.265327 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
     
    
      1995 
      2382.DPOO1.C1.HA.1.630.gp.9.12.lane8.NoIndex.L... 
      TTGCGACAAAGT 
      GTGCCAGCMGCCGCGGTAA 
      Grapes 
      HS7 
      2382 
      The soil microbiome influences grapevine-assoc... 
      Jack Gilbert 
      10.1128/mBio.02527-14 
      ERP006348 
      ... 
      0.092204 
      3.604761 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
     
    
      1996 
      2382.DPOO1.C1.HA.1.629.leav.9.12.lane8.NoIndex... 
      ACTACCTCTTCA 
      GTGCCAGCMGCCGCGGTAA 
      Leaves 
      HS7 
      2382 
      The soil microbiome influences grapevine-assoc... 
      Jack Gilbert 
      10.1128/mBio.02527-14 
      ERP006348 
      ... 
      0.168218 
      2.056739 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
     
    
      1997 
      2382.DPOO1.C1.HA.1.628.root.9.12.lane7.NoIndex... 
      AGATGTCCGTCA 
      GTGCCAGCMGCCGCGGTAA 
      Roots 
      HS7 
      2382 
      The soil microbiome influences grapevine-assoc... 
      Jack Gilbert 
      10.1128/mBio.02527-14 
      ERP006348 
      ... 
      6.728780 
      70.312703 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
     
    
      1998 
      2382.DPOO1.C1.HA.1.428.root.4.12.lane7.NoIndex... 
      ATGTTTAGACGG 
      GTGCCAGCMGCCGCGGTAA 
      Roots 
      HS10 
      2382 
      The soil microbiome influences grapevine-assoc... 
      Jack Gilbert 
      10.1128/mBio.02527-14 
      ERP006348 
      ... 
      6.353810 
      50.767095 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
     
    
      1999 
      2382.DPOO1.C1.HA.1.228.root.9.11.lane1.NoIndex... 
      TAGTGCATTCGG 
      GTGCCAGCMGCCGCGGTAA 
      Roots 
      HS10 
      2382 
      The soil microbiome influences grapevine-assoc... 
      Jack Gilbert 
      10.1128/mBio.02527-14 
      ERP006348 
      ... 
      7.228462 
      83.205352 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
      Not applicable 
     
  
2000 rows × 76 columns
In [45]:
    
# reduce map
subset_2k_map_90_100 = subset_2k_map.loc[(subset_2k_map['#SampleID'].isin(samples_150)) &
                                         (subset_2k_map['#SampleID'].isin(samples_90)),]
    
In [46]:
    
# pick N random sequences and write to file
subset_2k_map_90_100.sample(n=200)['#SampleID'].to_csv('./200_samples.txt', index=False)
    
In [47]:
    
# filter distance matrices by 
!filter_distance_matrix.py -i ./emp_150_gg_1k_unweighted_unifrac.txt \
-o ./emp_150_gg_1k_unweighted_unifrac.200.txt \
--sample_id_fp ./200_samples.txt
!filter_distance_matrix.py -i ./emp_90_gg_1k_unweighted_unifrac.txt \
-o ./emp_90_gg_1k_unweighted_unifrac.200.txt \
--sample_id_fp ./200_samples.txt
    
In [48]:
    
# compute PC
!principal_coordinates.py -i ./emp_150_gg_1k_unweighted_unifrac.200.txt -o ./emp_150_gg_1k_unweighted_unifrac.200.txt.pc
!principal_coordinates.py -i ./emp_90_gg_1k_unweighted_unifrac.200.txt -o ./emp_90_gg_1k_unweighted_unifrac.200.txt.pc
    
    
/home/jgsanders/miniconda/envs/qiime/lib/python2.7/site-packages/skbio/stats/ordination/_principal_coordinate_analysis.py:107: RuntimeWarning: The result contains negative eigenvalues. Please compare their magnitude with the magnitude of some of the largest positive eigenvalues. If the negative ones are smaller, it's probably safe to ignore them, but if they are large in magnitude, the results won't be useful. See the Notes section for more details. The smallest eigenvalue is -0.0380954766005 and the largest is 9.52380296681.
  RuntimeWarning
/home/jgsanders/miniconda/envs/qiime/lib/python2.7/site-packages/skbio/stats/ordination/_principal_coordinate_analysis.py:107: RuntimeWarning: The result contains negative eigenvalues. Please compare their magnitude with the magnitude of some of the largest positive eigenvalues. If the negative ones are smaller, it's probably safe to ignore them, but if they are large in magnitude, the results won't be useful. See the Notes section for more details. The smallest eigenvalue is -0.0348895730356 and the largest is 9.35412012146.
  RuntimeWarning
In [50]:
    
# compute procrustes
!transform_coordinate_matrices.py -i emp_90_gg_1k_unweighted_unifrac.200.txt.pc,emp_150_gg_1k_unweighted_unifrac.200.txt.pc  \
-r 999 \
-o procrustes_results/
    
In [51]:
    
# make emperor plot of procrustes
!make_emperor.py -c -i procrustes_results/ \
-o procrustes_results/plots/ \
-m ./emp_qiime_mapping_subset_2k_20170606.tsv
    
Content source: cuttlefishh/emp
Similar notebooks: