In [ ]:
import pandas as pd
import numpy as np
Note: this notebook requires operating on the original UniFrac distances matrices, which are very large.
To reproduce, download the following files from ftp://ftp.microbio.me/emp/release1/results/beta_diversity/deblur/:
emp_150_gg_1k_unweighted_unifrac.txt
emp_90_gg_1k_unweighted_unifrac.txt
In [34]:
# read in samples
samples_150 = pd.read_csv('./emp_150_gg_1k_unweighted_unifrac.txt', nrows=1, sep ='\t', header=None).T[0][1:]
samples_90 = pd.read_csv('./emp_90_gg_1k_unweighted_unifrac.txt', nrows=1, sep ='\t', header=None).T[0][1:]
In [18]:
# read in sample map
subset_2k_map = pd.read_csv('../../data/mapping-files/emp_qiime_mapping_subset_2k.tsv', header=0, sep='\t')
subset_2k_map.head()
Out[18]:
#SampleID
BarcodeSequence
LinkerPrimerSequence
Description
host_subject_id
study_id
title
principal_investigator
doi
ebi_accession
...
adiv_shannon
adiv_faith_pd
temperature_deg_c
ph
salinity_psu
oxygen_mg_per_l
phosphate_umol_per_l
ammonium_umol_per_l
nitrate_umol_per_l
sulfate_umol_per_l
0
550.L1S116.s.1.sequence
ATGCCTGAGCAG
GTGCCAGCMGCCGCGGTAA
sample_20 stool
F4
550
Moving pictures of the human microbiome
Rob Knight
10.1186/gb-2011-12-5-r50
ERP021896
...
3.867414
12.457989
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
1
550.L1S119.s.1.sequence
CAGCACTAAGCG
GTGCCAGCMGCCGCGGTAA
sample_23 stool
F4
550
Moving pictures of the human microbiome
Rob Knight
10.1186/gb-2011-12-5-r50
ERP021896
...
3.265164
10.719448
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
2
550.L1S164.s.1.sequence
ATGTACGGCGAC
GTGCCAGCMGCCGCGGTAA
sample_73 stool
M3
550
Moving pictures of the human microbiome
Rob Knight
10.1186/gb-2011-12-5-r50
ERP021896
...
3.661124
14.214158
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
3
550.L1S194.s.1.sequence
CGAAGACTGCTG
GTGCCAGCMGCCGCGGTAA
sample_105 stool
M3
550
Moving pictures of the human microbiome
Rob Knight
10.1186/gb-2011-12-5-r50
ERP021896
...
4.439943
12.012602
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
4
550.L1S20.s.1.sequence
ACGGTGAGTGTC
GTGCCAGCMGCCGCGGTAA
sample_112 stool
F4
550
Moving pictures of the human microbiome
Rob Knight
10.1186/gb-2011-12-5-r50
ERP021896
...
3.327601
11.758069
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
5
550.L1S26.s.1.sequence
ACAGCAGTGGTC
GTGCCAGCMGCCGCGGTAA
sample_175 stool
F4
550
Moving pictures of the human microbiome
Rob Knight
10.1186/gb-2011-12-5-r50
ERP021896
...
3.272025
10.390166
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
6
550.L1S264.s.1.sequence
CTGTATCGTATG
GTGCCAGCMGCCGCGGTAA
sample_180 stool
M3
550
Moving pictures of the human microbiome
Rob Knight
10.1186/gb-2011-12-5-r50
ERP021896
...
5.732025
19.540809
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
7
550.L1S273.s.1.sequence
CCTAGTACTGAT
GTGCCAGCMGCCGCGGTAA
sample_189 stool
M3
550
Moving pictures of the human microbiome
Rob Knight
10.1186/gb-2011-12-5-r50
ERP021896
...
4.191533
14.265016
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
8
550.L2S103.s.2.sequence
AGCACGAGCCTA
GTGCCAGCMGCCGCGGTAA
sample_380 sebum
F4
550
Moving pictures of the human microbiome
Rob Knight
10.1186/gb-2011-12-5-r50
ERP021896
...
7.570207
52.371958
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
9
550.L2S144.s.2.sequence
AGTACGCTCGAG
GTGCCAGCMGCCGCGGTAA
sample_425 sebum
F4
550
Moving pictures of the human microbiome
Rob Knight
10.1186/gb-2011-12-5-r50
ERP021896
...
6.181649
38.653723
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
10
550.L2S164.s.2.sequence
ACTACAGCCTAT
GTGCCAGCMGCCGCGGTAA
sample_447 sebum
F4
550
Moving pictures of the human microbiome
Rob Knight
10.1186/gb-2011-12-5-r50
ERP021896
...
5.828009
37.769790
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
11
550.L2S166.s.2.sequence
AGATCGGCTCGA
GTGCCAGCMGCCGCGGTAA
sample_449 sebum
F4
550
Moving pictures of the human microbiome
Rob Knight
10.1186/gb-2011-12-5-r50
ERP021896
...
5.952527
42.317611
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
12
550.L2S303.s.2.sequence
CTCATGTACAGT
GTGCCAGCMGCCGCGGTAA
sample_597 sebum
M3
550
Moving pictures of the human microbiome
Rob Knight
10.1186/gb-2011-12-5-r50
ERP021896
...
1.868610
17.649715
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
13
550.L2S377.s.2.sequence
CCTCTCGTGATC
GTGCCAGCMGCCGCGGTAA
sample_676 sebum
M3
550
Moving pictures of the human microbiome
Rob Knight
10.1186/gb-2011-12-5-r50
ERP021896
...
6.057640
35.644789
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
14
550.L4S1.s.4.sequence
CATGGCTACACA
GTGCCAGCMGCCGCGGTAA
sample_1111 sebum
M3
550
Moving pictures of the human microbiome
Rob Knight
10.1186/gb-2011-12-5-r50
ERP021896
...
4.850969
34.136518
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
15
550.L4S102.s.4.sequence
GCAATAGCTGCT
GTGCCAGCMGCCGCGGTAA
sample_1115 sebum
M3
550
Moving pictures of the human microbiome
Rob Knight
10.1186/gb-2011-12-5-r50
ERP021896
...
7.066361
53.006615
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
16
550.L4S122.s.4.sequence
GACCACTACGAT
GTGCCAGCMGCCGCGGTAA
sample_1137 sebum
M3
550
Moving pictures of the human microbiome
Rob Knight
10.1186/gb-2011-12-5-r50
ERP021896
...
4.212268
25.715900
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
17
550.L4S140.s.4.sequence
GAGTCTGAGTCT
GTGCCAGCMGCCGCGGTAA
sample_1157 sebum
M3
550
Moving pictures of the human microbiome
Rob Knight
10.1186/gb-2011-12-5-r50
ERP021896
...
2.630993
13.920620
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
18
550.L4S158.s.4.sequence
GCAGGATAGATA
GTGCCAGCMGCCGCGGTAA
sample_1176 sebum
M3
550
Moving pictures of the human microbiome
Rob Knight
10.1186/gb-2011-12-5-r50
ERP021896
...
5.616810
30.118912
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
19
550.L4S161.s.4.sequence
GAAGTCTCGCAT
GTGCCAGCMGCCGCGGTAA
sample_1180 sebum
M3
550
Moving pictures of the human microbiome
Rob Knight
10.1186/gb-2011-12-5-r50
ERP021896
...
7.304861
55.092754
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
20
550.L4S183.s.4.sequence
GCGGATGTGACT
GTGCCAGCMGCCGCGGTAA
sample_1204 sebum
M3
550
Moving pictures of the human microbiome
Rob Knight
10.1186/gb-2011-12-5-r50
ERP021896
...
4.891649
24.036843
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
21
550.L5S119.s.5.sequence
CAGCACTAAGCG
GTGCCAGCMGCCGCGGTAA
sample_1403 saliva
F4
550
Moving pictures of the human microbiome
Rob Knight
10.1186/gb-2011-12-5-r50
ERP021896
...
3.647179
9.600555
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
22
550.L5S224.s.5.sequence
CTGCAGTACTTA
GTGCCAGCMGCCGCGGTAA
sample_1519 saliva
F4
550
Moving pictures of the human microbiome
Rob Knight
10.1186/gb-2011-12-5-r50
ERP021896
...
3.869599
10.713225
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
23
550.L5S258.s.5.sequence
CGAGTCTAGTTG
GTGCCAGCMGCCGCGGTAA
sample_1556 saliva
M3
550
Moving pictures of the human microbiome
Rob Knight
10.1186/gb-2011-12-5-r50
ERP021896
...
3.841479
10.921526
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
24
550.L6S104.s.6.sequence
TAGATCCTCGAT
GTGCCAGCMGCCGCGGTAA
sample_1653 saliva
M3
550
Moving pictures of the human microbiome
Rob Knight
10.1186/gb-2011-12-5-r50
ERP021896
...
4.329496
11.399017
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
25
550.L6S144.s.6.sequence
TAGCGGATCACG
GTGCCAGCMGCCGCGGTAA
sample_1695 saliva
M3
550
Moving pictures of the human microbiome
Rob Knight
10.1186/gb-2011-12-5-r50
ERP021896
...
3.687083
10.642233
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
26
550.L6S149.s.6.sequence
GTGTCTACATTG
GTGCCAGCMGCCGCGGTAA
sample_1700 saliva
M3
550
Moving pictures of the human microbiome
Rob Knight
10.1186/gb-2011-12-5-r50
ERP021896
...
4.226336
12.062299
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
27
550.L6S176.s.6.sequence
TAGTCGTCTAGT
GTGCCAGCMGCCGCGGTAA
sample_1730 saliva
M3
550
Moving pictures of the human microbiome
Rob Knight
10.1186/gb-2011-12-5-r50
ERP021896
...
3.991563
11.172998
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
28
550.L6S186.s.6.sequence
GTAGCTGACGCA
GTGCCAGCMGCCGCGGTAA
sample_1741 saliva
M3
550
Moving pictures of the human microbiome
Rob Knight
10.1186/gb-2011-12-5-r50
ERP021896
...
4.099716
11.555792
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
29
550.L6S197.s.6.sequence
ACTCACGGTATG
GTGCCAGCMGCCGCGGTAA
sample_1752 saliva
M3
550
Moving pictures of the human microbiome
Rob Knight
10.1186/gb-2011-12-5-r50
ERP021896
...
3.953951
10.386101
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
1970
2382.GM.181.R4.leav.10.12.lane8.NoIndex.L008.s...
CGATGTGTGGTT
GTGCCAGCMGCCGCGGTAA
Leaves
HS4
2382
The soil microbiome influences grapevine-assoc...
Jack Gilbert
10.1128/mBio.02527-14
ERP006348
...
1.335486
7.612504
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
1971
2382.GM.181.R4.gp.10.12.lane8.NoIndex.L008.seq...
GATCCTCATGCG
GTGCCAGCMGCCGCGGTAA
Grapes
HS4
2382
The soil microbiome influences grapevine-assoc...
Jack Gilbert
10.1128/mBio.02527-14
ERP006348
...
0.316466
5.129634
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
1972
2382.GM.181.R3.rhizo.10.12.lane7.NoIndex.L007....
TATCCAAGCGCA
GTGCCAGCMGCCGCGGTAA
Rhizosphere
HS4
2382
The soil microbiome influences grapevine-assoc...
Jack Gilbert
10.1128/mBio.02527-14
ERP006348
...
9.029518
113.700721
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
1973
2382.GM.181.R3.leav.10.12.lane8.NoIndex.L008.s...
TCCATCGACGTG
GTGCCAGCMGCCGCGGTAA
Leaves
HS4
2382
The soil microbiome influences grapevine-assoc...
Jack Gilbert
10.1128/mBio.02527-14
ERP006348
...
0.336742
4.742170
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
1974
2382.GM.181.R3.gp.10.12.lane8.NoIndex.L008.seq...
AGACATACCGTA
GTGCCAGCMGCCGCGGTAA
Grapes
HS4
2382
The soil microbiome influences grapevine-assoc...
Jack Gilbert
10.1128/mBio.02527-14
ERP006348
...
0.121966
4.951791
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
1975
2382.GM.181.R2.root.10.12.lane7.NoIndex.L007.s...
GGCATGTTATCG
GTGCCAGCMGCCGCGGTAA
Roots
HS4
2382
The soil microbiome influences grapevine-assoc...
Jack Gilbert
10.1128/mBio.02527-14
ERP006348
...
6.523943
48.856413
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
1976
2382.GM.181.R2.rhizo.10.12.lane7.NoIndex.L007....
ACCAATCTCGGC
GTGCCAGCMGCCGCGGTAA
Rhizosphere
HS4
2382
The soil microbiome influences grapevine-assoc...
Jack Gilbert
10.1128/mBio.02527-14
ERP006348
...
8.553171
98.373702
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
1977
2382.GM.181.R2.leav.10.12.lane8.NoIndex.L008.s...
AGCTCTAGAAAC
GTGCCAGCMGCCGCGGTAA
Leaves
HS4
2382
The soil microbiome influences grapevine-assoc...
Jack Gilbert
10.1128/mBio.02527-14
ERP006348
...
0.031598
4.116123
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
1978
2382.GM.181.R2.gp.10.12.lane8.NoIndex.L008.seq...
GATCATTCTCTC
GTGCCAGCMGCCGCGGTAA
Grapes
HS4
2382
The soil microbiome influences grapevine-assoc...
Jack Gilbert
10.1128/mBio.02527-14
ERP006348
...
0.144597
4.259524
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
1979
2382.GM.181.R1.leav.10.12.lane8.NoIndex.L008.s...
CCAGACCGCTAT
GTGCCAGCMGCCGCGGTAA
Leaves
HS4
2382
The soil microbiome influences grapevine-assoc...
Jack Gilbert
10.1128/mBio.02527-14
ERP006348
...
1.098354
8.048152
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
1980
2382.GM.181.R1.gp.10.12.lane8.NoIndex.L008.seq...
TCTGAGGTTGCC
GTGCCAGCMGCCGCGGTAA
Grapes
HS4
2382
The soil microbiome influences grapevine-assoc...
Jack Gilbert
10.1128/mBio.02527-14
ERP006348
...
0.049765
4.818847
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
1981
2382.DPOO1.C1.HA.5.650.gp.9.12.lane8.NoIndex.L...
ACCTTACACCTT
GTGCCAGCMGCCGCGGTAA
Grapes
HS3
2382
The soil microbiome influences grapevine-assoc...
Jack Gilbert
10.1128/mBio.02527-14
ERP006348
...
0.172636
4.406474
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
1982
2382.DPOO1.C1.HA.5.649.leav.9.12.lane8.NoIndex...
AACCATGCCAAC
GTGCCAGCMGCCGCGGTAA
Leaves
HS7
2382
The soil microbiome influences grapevine-assoc...
Jack Gilbert
10.1128/mBio.02527-14
ERP006348
...
0.020367
3.148375
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
1983
2382.DPOO1.C1.HA.5.648.root.9.12.lane7.NoIndex...
GAGGTTCTTGAC
GTGCCAGCMGCCGCGGTAA
Roots
HS7
2382
The soil microbiome influences grapevine-assoc...
Jack Gilbert
10.1128/mBio.02527-14
ERP006348
...
6.035013
54.134147
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
1984
2382.DPOO1.C1.HA.5.448.root.4.12.lane7.NoIndex...
AGCCTCATGATG
GTGCCAGCMGCCGCGGTAA
Roots
HS10
2382
The soil microbiome influences grapevine-assoc...
Jack Gilbert
10.1128/mBio.02527-14
ERP006348
...
6.658723
61.613640
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
1985
2382.DPOO1.C1.HA.4.645.gp.9.12.lane8.NoIndex.L...
ACTAGCGTTCAG
GTGCCAGCMGCCGCGGTAA
Grapes
HS7
2382
The soil microbiome influences grapevine-assoc...
Jack Gilbert
10.1128/mBio.02527-14
ERP006348
...
0.047735
2.771104
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
1986
2382.DPOO1.C1.HA.4.644.leav.9.12.lane8.NoIndex...
GGTAAGTTTGAC
GTGCCAGCMGCCGCGGTAA
Leaves
HS7
2382
The soil microbiome influences grapevine-assoc...
Jack Gilbert
10.1128/mBio.02527-14
ERP006348
...
0.018037
1.768118
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
1987
2382.DPOO1.C1.HA.3.640.gp.9.12.lane8.NoIndex.L...
CCGAAGATTCTG
GTGCCAGCMGCCGCGGTAA
Grapes
HS7
2382
The soil microbiome influences grapevine-assoc...
Jack Gilbert
10.1128/mBio.02527-14
ERP006348
...
0.096775
4.252410
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
1988
2382.DPOO1.C1.HA.3.639.leav.9.12.lane8.NoIndex...
TGGAATTCGGCT
GTGCCAGCMGCCGCGGTAA
Leaves
HS7
2382
The soil microbiome influences grapevine-assoc...
Jack Gilbert
10.1128/mBio.02527-14
ERP006348
...
0.047049
3.368435
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
1989
2382.DPOO1.C1.HA.3.32.r1.leav.6.11.lane8.NoInd...
TGTATCTTCACC
GTGCCAGCMGCCGCGGTAA
Leaves
HS9
2382
The soil microbiome influences grapevine-assoc...
Jack Gilbert
10.1128/mBio.02527-14
ERP006348
...
0.040234
5.070915
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
1990
2382.DPOO1.C1.HA.3.238.root.9.11.lane1.NoIndex...
ATCGTGTGTTGG
GTGCCAGCMGCCGCGGTAA
Roots
HS10
2382
The soil microbiome influences grapevine-assoc...
Jack Gilbert
10.1128/mBio.02527-14
ERP006348
...
7.417508
76.719597
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
1991
2382.DPOO1.C1.HA.2.635.gp.9.12.lane8.NoIndex.L...
GGCGTTGCATTC
GTGCCAGCMGCCGCGGTAA
Grapes
HS7
2382
The soil microbiome influences grapevine-assoc...
Jack Gilbert
10.1128/mBio.02527-14
ERP006348
...
0.015675
2.704737
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
1992
2382.DPOO1.C1.HA.2.634.leav.9.12.lane8.NoIndex...
TCTTCAACTACC
GTGCCAGCMGCCGCGGTAA
Leaves
HS7
2382
The soil microbiome influences grapevine-assoc...
Jack Gilbert
10.1128/mBio.02527-14
ERP006348
...
0.088071
2.056739
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
1993
2382.DPOO1.C1.HA.2.633.root.9.12.lane7.NoIndex...
ATGTCACCGCTG
GTGCCAGCMGCCGCGGTAA
Roots
HS1
2382
The soil microbiome influences grapevine-assoc...
Jack Gilbert
10.1128/mBio.02527-14
ERP006348
...
7.351779
75.716918
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
1994
2382.DPOO1.C1.HA.2.433.root.4.12.lane7.NoIndex...
CCAAACTCGTCG
GTGCCAGCMGCCGCGGTAA
Roots
HS10
2382
The soil microbiome influences grapevine-assoc...
Jack Gilbert
10.1128/mBio.02527-14
ERP006348
...
7.070601
69.265327
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
1995
2382.DPOO1.C1.HA.1.630.gp.9.12.lane8.NoIndex.L...
TTGCGACAAAGT
GTGCCAGCMGCCGCGGTAA
Grapes
HS7
2382
The soil microbiome influences grapevine-assoc...
Jack Gilbert
10.1128/mBio.02527-14
ERP006348
...
0.092204
3.604761
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
1996
2382.DPOO1.C1.HA.1.629.leav.9.12.lane8.NoIndex...
ACTACCTCTTCA
GTGCCAGCMGCCGCGGTAA
Leaves
HS7
2382
The soil microbiome influences grapevine-assoc...
Jack Gilbert
10.1128/mBio.02527-14
ERP006348
...
0.168218
2.056739
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
1997
2382.DPOO1.C1.HA.1.628.root.9.12.lane7.NoIndex...
AGATGTCCGTCA
GTGCCAGCMGCCGCGGTAA
Roots
HS7
2382
The soil microbiome influences grapevine-assoc...
Jack Gilbert
10.1128/mBio.02527-14
ERP006348
...
6.728780
70.312703
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
1998
2382.DPOO1.C1.HA.1.428.root.4.12.lane7.NoIndex...
ATGTTTAGACGG
GTGCCAGCMGCCGCGGTAA
Roots
HS10
2382
The soil microbiome influences grapevine-assoc...
Jack Gilbert
10.1128/mBio.02527-14
ERP006348
...
6.353810
50.767095
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
1999
2382.DPOO1.C1.HA.1.228.root.9.11.lane1.NoIndex...
TAGTGCATTCGG
GTGCCAGCMGCCGCGGTAA
Roots
HS10
2382
The soil microbiome influences grapevine-assoc...
Jack Gilbert
10.1128/mBio.02527-14
ERP006348
...
7.228462
83.205352
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
Not applicable
2000 rows × 76 columns
In [45]:
# reduce map
subset_2k_map_90_100 = subset_2k_map.loc[(subset_2k_map['#SampleID'].isin(samples_150)) &
(subset_2k_map['#SampleID'].isin(samples_90)),]
In [46]:
# pick N random sequences and write to file
subset_2k_map_90_100.sample(n=200)['#SampleID'].to_csv('./200_samples.txt', index=False)
In [47]:
# filter distance matrices by
!filter_distance_matrix.py -i ./emp_150_gg_1k_unweighted_unifrac.txt \
-o ./emp_150_gg_1k_unweighted_unifrac.200.txt \
--sample_id_fp ./200_samples.txt
!filter_distance_matrix.py -i ./emp_90_gg_1k_unweighted_unifrac.txt \
-o ./emp_90_gg_1k_unweighted_unifrac.200.txt \
--sample_id_fp ./200_samples.txt
In [48]:
# compute PC
!principal_coordinates.py -i ./emp_150_gg_1k_unweighted_unifrac.200.txt -o ./emp_150_gg_1k_unweighted_unifrac.200.txt.pc
!principal_coordinates.py -i ./emp_90_gg_1k_unweighted_unifrac.200.txt -o ./emp_90_gg_1k_unweighted_unifrac.200.txt.pc
/home/jgsanders/miniconda/envs/qiime/lib/python2.7/site-packages/skbio/stats/ordination/_principal_coordinate_analysis.py:107: RuntimeWarning: The result contains negative eigenvalues. Please compare their magnitude with the magnitude of some of the largest positive eigenvalues. If the negative ones are smaller, it's probably safe to ignore them, but if they are large in magnitude, the results won't be useful. See the Notes section for more details. The smallest eigenvalue is -0.0380954766005 and the largest is 9.52380296681.
RuntimeWarning
/home/jgsanders/miniconda/envs/qiime/lib/python2.7/site-packages/skbio/stats/ordination/_principal_coordinate_analysis.py:107: RuntimeWarning: The result contains negative eigenvalues. Please compare their magnitude with the magnitude of some of the largest positive eigenvalues. If the negative ones are smaller, it's probably safe to ignore them, but if they are large in magnitude, the results won't be useful. See the Notes section for more details. The smallest eigenvalue is -0.0348895730356 and the largest is 9.35412012146.
RuntimeWarning
In [50]:
# compute procrustes
!transform_coordinate_matrices.py -i emp_90_gg_1k_unweighted_unifrac.200.txt.pc,emp_150_gg_1k_unweighted_unifrac.200.txt.pc \
-r 999 \
-o procrustes_results/
In [51]:
# make emperor plot of procrustes
!make_emperor.py -c -i procrustes_results/ \
-o procrustes_results/plots/ \
-m ./emp_qiime_mapping_subset_2k_20170606.tsv
Content source: cuttlefishh/emp
Similar notebooks: