In [1]:
import bacteriopop_utils
import feature_selection_utils
import load_data
In [2]:
loaded_data = load_data.load_data()
In [3]:
loaded_data.shape
Out[3]:
(64755, 11)
In [4]:
loaded_data.head()
Out[4]:
kingdom
phylum
class
order
family
genus
length
oxygen
replicate
week
abundance
sampleID
1056013
Bacteria
Proteobacteria
Gammaproteobacteria
Methylococcales
Methylococcaceae
Methylobacter
9948861
Low
1
4
0.228531
1056013
Bacteria
Proteobacteria
Betaproteobacteria
Methylophilales
Methylophilaceae
Methylotenera
5066955
Low
1
4
0.220860
1056013
Bacteria
Bacteroidetes
Flavobacteriia
Flavobacteriales
Flavobacteriaceae
Flavobacterium
4654774
Low
1
4
0.054719
1056013
Bacteria
Proteobacteria
Gammaproteobacteria
Methylococcales
Methylococcaceae
3046340
Low
1
4
0.047956
1056013
Bacteria
Proteobacteria
Gammaproteobacteria
5620690
Low
1
4
0.040903
How many taxa are in each groupby?
In [38]:
# df[['col1', 'col2', 'col3', 'col4']].groupby(['col1', 'col2']).agg(['mean', 'count'])
taxa_per_sample = loaded_data.groupby(['week', 'oxygen', 'replicate'])['abundance'].agg('count')
In [40]:
taxa_per_sample.head(20)
Out[40]:
week oxygen replicate
4 High 1 877
2 751
3 931
4 748
Low 1 791
2 833
3 803
4 766
5 High 1 931
2 728
3 635
4 928
Low 1 851
2 735
3 786
4 684
6 High 1 911
2 642
3 791
4 808
Name: abundance, dtype: int64
In [ ]:
In [5]:
abs(-0.01)
Out[5]:
0.01
In [6]:
sample_abundance_sums = loaded_data.groupby(
['oxygen', 'replicate', 'week'])['abundance'].sum()
In [7]:
# Look for values that aren't really close to 1.
print [v for v in sample_abundance_sums.unique() if v <0.999]
# an empty list is good!
[]
implement as a function that returns true if the appropriate groupby objects have abundances sum to 1.
In [8]:
bacteriopop_utils.check_abundances_sums(loaded_data)
all groups of oxygen/week/replicate have abundances that sum to 1
Out[8]:
True
Make a negative control by deleting the first three rows of the dataframe.
In [9]:
loaded_data.head()
Out[9]:
kingdom
phylum
class
order
family
genus
length
oxygen
replicate
week
abundance
sampleID
1056013
Bacteria
Proteobacteria
Gammaproteobacteria
Methylococcales
Methylococcaceae
Methylobacter
9948861
Low
1
4
0.228531
1056013
Bacteria
Proteobacteria
Betaproteobacteria
Methylophilales
Methylophilaceae
Methylotenera
5066955
Low
1
4
0.220860
1056013
Bacteria
Bacteroidetes
Flavobacteriia
Flavobacteriales
Flavobacteriaceae
Flavobacterium
4654774
Low
1
4
0.054719
1056013
Bacteria
Proteobacteria
Gammaproteobacteria
Methylococcales
Methylococcaceae
3046340
Low
1
4
0.047956
1056013
Bacteria
Proteobacteria
Gammaproteobacteria
5620690
Low
1
4
0.040903
In [10]:
broken_data = loaded_data.copy()
broken_data = broken_data.iloc[3:]
In [11]:
bacteriopop_utils.check_abundances_sums(broken_data)
number of samples that don't sum to 1: 1
Out[11]:
False
In [12]:
loaded_data.shape
Out[12]:
(64755, 11)
In [13]:
broken_data.shape
Out[13]:
(64752, 11)
In [14]:
apl = bacteriopop_utils.aggregate_on_phylo_level(loaded_data, 'order')
In [15]:
apl.head()
Out[15]:
abundance
oxygen
replicate
week
kingdom
phylum
class
order
High
1
4
Archaea
0.000028
Crenarchaeota
Thermoprotei
0.000026
Acidilobales
0.000007
Desulfurococcales
0.000005
Sulfolobales
0.000015
In [16]:
bacteriopop_utils.check_abundances_sums(apl)
all groups of oxygen/week/replicate have abundances that sum to 1
Out[16]:
True
In [17]:
fba = bacteriopop_utils.filter_by_abundance(loaded_data, low = 0.01)
fba.head()
first (up to) 5 phylo columns to
keep: ['Methylobacter' 'Methylotenera' 'Flavobacterium' '' 'Acidovorax']
Out[17]:
kingdom
phylum
class
order
family
genus
oxygen
replicate
week
abundance
sampleID
1056013
Bacteria
Proteobacteria
Gammaproteobacteria
Methylococcales
Methylococcaceae
Methylobacter
Low
1
4
0.228531
1056013
Bacteria
Proteobacteria
Betaproteobacteria
Methylophilales
Methylophilaceae
Methylotenera
Low
1
4
0.220860
1056013
Bacteria
Bacteroidetes
Flavobacteriia
Flavobacteriales
Flavobacteriaceae
Flavobacterium
Low
1
4
0.054719
1056013
Bacteria
Proteobacteria
Gammaproteobacteria
Methylococcales
Methylococcaceae
Low
1
4
0.047956
1056013
Bacteria
Proteobacteria
Gammaproteobacteria
Low
1
4
0.040903
In [18]:
bacteriopop_utils.check_abundances_sums(fba)
number of samples that don't sum to 1: 88
Out[18]:
False
We expected filter_by_abundance()
to cause our abundances to sum to a number less than 1
make sure "other" which is now called "" is there.
In [19]:
fba['genus'].unique()
Out[19]:
array(['Methylobacter', 'Methylotenera', 'Flavobacterium', '',
'Acidovorax', 'Dechloromonas', 'Methylomonas', 'Methylosarcina',
'Methylomicrobium', 'Methylovulum', 'Methyloglobulus',
'Pseudomonas', 'Sorangium', 'Burkholderia', 'Polaromonas',
'Methylovorus', 'Streptomyces', 'Methylophilus', 'Curvibacter',
'Comamonas', 'Mycobacterium', 'Desulfobacter', 'Rheinheimera',
'Azospirillum', 'Hylemonella', 'Bdellovibrio', 'Delftia',
'Lactobacillus', 'Flectobacillus', 'Bosea', 'Bacteriovorax'], dtype=object)
In [20]:
rd_order = bacteriopop_utils.reduce_data(dataframe = loaded_data,
min_abundance = 0.05,
phylo_column='order')
rd_order.head(15)
all groups of oxygen/week/replicate have abundances that sum to 1
columns after aggregating on phylo level: Index([u'oxygen', u'replicate', u'week', u'kingdom', u'phylum', u'class',
u'order', u'abundance'],
dtype='object')
first (up to) 5 phylo columns to
keep: ['Flavobacteriales' 'Burkholderiales' 'Methylophilales' 'Methylococcales'
'Actinomycetales']
Out[20]:
oxygen
replicate
week
kingdom
phylum
class
order
abundance
0
High
1
4
Archaea
0.000028
1
High
1
4
Archaea
Crenarchaeota
Thermoprotei
0.000026
6
High
1
4
Archaea
Euryarchaeota
0.000032
8
High
1
4
Archaea
Euryarchaeota
Halobacteria
0.000029
16
High
1
4
Archaea
Thaumarchaeota
0.000004
17
High
1
4
Bacteria
0.020489
18
High
1
4
Bacteria
Acidobacteria
0.000041
21
High
1
4
Bacteria
Actinobacteria
0.000009
22
High
1
4
Bacteria
Actinobacteria
Actinobacteria
0.000062
24
High
1
4
Bacteria
Actinobacteria
Actinobacteria
Actinomycetales
0.008812
30
High
1
4
Bacteria
Aquificae
Aquificae
0.000007
32
High
1
4
Bacteria
Armatimonadetes
0.000095
34
High
1
4
Bacteria
Bacteroidetes
0.016066
36
High
1
4
Bacteria
Bacteroidetes
Cytophagia
0.000065
38
High
1
4
Bacteria
Bacteroidetes
Flavobacteriia
0.000015
In [21]:
rd_order.shape
Out[21]:
(3107, 8)
In [22]:
loaded_data.shape
Out[22]:
(64755, 10)
In [23]:
rd_family = bacteriopop_utils.reduce_data(dataframe = loaded_data,
min_abundance = 0.05,
phylo_column='family',
#oxygen='low'
)
print rd_family.shape
rd_family.head()
all groups of oxygen/week/replicate have abundances that sum to 1
columns after aggregating on phylo level: Index([u'oxygen', u'replicate', u'week', u'kingdom', u'phylum', u'class',
u'order', u'family', u'abundance'],
dtype='object')
first (up to) 5 phylo columns to
keep: ['Flavobacteriaceae' 'Comamonadaceae' 'Methylophilaceae' 'Methylococcaceae'
'']
(5564, 9)
Out[23]:
oxygen
replicate
week
kingdom
phylum
class
order
family
abundance
0
High
1
4
Archaea
0.000028
1
High
1
4
Archaea
Crenarchaeota
Thermoprotei
0.000026
5
High
1
4
Archaea
Crenarchaeota
Thermoprotei
Thermoproteales
0.000005
8
High
1
4
Archaea
Euryarchaeota
0.000032
10
High
1
4
Archaea
Euryarchaeota
Halobacteria
0.000029
In [24]:
bae = bacteriopop_utils.break_apart_experiments(rd_order.reset_index())
('High', 1)
('High', 2)
('High', 3)
('High', 4)
('Low', 1)
('Low', 2)
('Low', 3)
('Low', 4)
dictionary keys: [('High', 4), ('Low', 1), ('High', 3), ('Low', 2), ('High', 2), ('Low', 3), ('High', 1), ('Low', 4)]
In [25]:
bae.keys()
Out[25]:
[('High', 4),
('Low', 1),
('High', 3),
('Low', 2),
('High', 2),
('Low', 3),
('High', 1),
('Low', 4)]
In [26]:
bae.values()[0]
Out[26]:
index
oxygen
replicate
week
kingdom
phylum
class
order
abundance
1184
4026
High
4
4
Archaea
Euryarchaeota
0.000012
1185
4034
High
4
4
Archaea
Thaumarchaeota
0.000010
1186
4035
High
4
4
Bacteria
0.023130
1187
4036
High
4
4
Bacteria
Acidobacteria
0.000063
1188
4040
High
4
4
Bacteria
Actinobacteria
0.000004
1189
4041
High
4
4
Bacteria
Actinobacteria
Actinobacteria
0.000005
1190
4043
High
4
4
Bacteria
Actinobacteria
Actinobacteria
Actinomycetales
0.002334
1191
4048
High
4
4
Bacteria
Aquificae
Aquificae
0.000008
1192
4050
High
4
4
Bacteria
Armatimonadetes
0.000017
1193
4052
High
4
4
Bacteria
Bacteroidetes
0.009136
1194
4054
High
4
4
Bacteria
Bacteroidetes
Cytophagia
0.000023
1195
4056
High
4
4
Bacteria
Bacteroidetes
Flavobacteriia
0.000027
1196
4057
High
4
4
Bacteria
Bacteroidetes
Flavobacteriia
Flavobacteriales
0.025151
1197
4061
High
4
4
Bacteria
Chloroflexi
0.000115
1198
4066
High
4
4
Bacteria
Cloacimonetes
0.000036
1199
4067
High
4
4
Bacteria
Cyanobacteria
0.004914
1200
4072
High
4
4
Bacteria
Fibrobacteres
0.000038
1201
4073
High
4
4
Bacteria
Firmicutes
0.000091
1202
4074
High
4
4
Bacteria
Firmicutes
Bacilli
0.000046
1203
4083
High
4
4
Bacteria
Gemmatimonadetes
Gemmatimonadetes
0.000166
1204
4089
High
4
4
Bacteria
Proteobacteria
0.032780
1205
4090
High
4
4
Bacteria
Proteobacteria
Alphaproteobacteria
0.000294
1206
4099
High
4
4
Bacteria
Proteobacteria
Betaproteobacteria
0.008793
1207
4100
High
4
4
Bacteria
Proteobacteria
Betaproteobacteria
Burkholderiales
0.171132
1208
4103
High
4
4
Bacteria
Proteobacteria
Betaproteobacteria
Methylophilales
0.315817
1209
4107
High
4
4
Bacteria
Proteobacteria
Deltaproteobacteria
0.001556
1210
4108
High
4
4
Bacteria
Proteobacteria
Deltaproteobacteria
Bdellovibrionales
0.037076
1211
4116
High
4
4
Bacteria
Proteobacteria
Epsilonproteobacteria
0.000244
1212
4118
High
4
4
Bacteria
Proteobacteria
Gammaproteobacteria
0.021199
1213
4125
High
4
4
Bacteria
Proteobacteria
Gammaproteobacteria
Methylococcales
0.236897
...
...
...
...
...
...
...
...
...
...
1540
5226
High
4
13
unassigned
0.098949
1541
5230
High
4
14
Archaea
Euryarchaeota
Methanomicrobia
0.000003
1542
5234
High
4
14
Archaea
Thaumarchaeota
0.000002
1543
5235
High
4
14
Bacteria
0.002712
1544
5236
High
4
14
Bacteria
Acidobacteria
0.000004
1545
5240
High
4
14
Bacteria
Actinobacteria
0.000002
1546
5242
High
4
14
Bacteria
Actinobacteria
Actinobacteria
Actinomycetales
0.000461
1547
5248
High
4
14
Bacteria
Bacteroidetes
0.000289
1548
5251
High
4
14
Bacteria
Bacteroidetes
Flavobacteriia
0.000002
1549
5252
High
4
14
Bacteria
Bacteroidetes
Flavobacteriia
Flavobacteriales
0.008751
1550
5254
High
4
14
Bacteria
Candidatus Saccharibacteria
0.000004
1551
5258
High
4
14
Bacteria
Chloroflexi
Ktedonobacteria
0.000004
1552
5262
High
4
14
Bacteria
Cloacimonetes
0.000002
1553
5263
High
4
14
Bacteria
Cyanobacteria
0.001999
1554
5274
High
4
14
Bacteria
Gemmatimonadetes
Gemmatimonadetes
0.000007
1555
5280
High
4
14
Bacteria
Proteobacteria
0.012295
1556
5281
High
4
14
Bacteria
Proteobacteria
Alphaproteobacteria
0.000083
1557
5290
High
4
14
Bacteria
Proteobacteria
Betaproteobacteria
0.003799
1558
5291
High
4
14
Bacteria
Proteobacteria
Betaproteobacteria
Burkholderiales
0.128910
1559
5294
High
4
14
Bacteria
Proteobacteria
Betaproteobacteria
Methylophilales
0.232417
1560
5298
High
4
14
Bacteria
Proteobacteria
Deltaproteobacteria
0.000012
1561
5299
High
4
14
Bacteria
Proteobacteria
Deltaproteobacteria
Bdellovibrionales
0.000003
1562
5306
High
4
14
Bacteria
Proteobacteria
Gammaproteobacteria
0.007299
1563
5313
High
4
14
Bacteria
Proteobacteria
Gammaproteobacteria
Methylococcales
0.443425
1564
5316
High
4
14
Bacteria
Proteobacteria
Gammaproteobacteria
Pseudomonadales
0.008739
1565
5324
High
4
14
Bacteria
Tenericutes
Mollicutes
0.000002
1566
5326
High
4
14
Bacteria
Verrucomicrobia
0.000004
1567
5327
High
4
14
Bacteria
Verrucomicrobia
Opitutae
0.000010
1568
5330
High
4
14
Eukaryota
Mollusca
0.000003
1569
5331
High
4
14
unassigned
0.116118
386 rows × 9 columns
In [27]:
bacteriopop_utils.pivot_for_abundance_matrix(
bae.values()[0].reset_index())
Out[27]:
week
4
5
6
7
8
9
10
11
12
13
14
phylo_concat
Archaea,,,
NaN
0.000018
0.000004
NaN
NaN
0.000094
NaN
1.200000e-06
NaN
NaN
NaN
Archaea,Crenarchaeota,,
NaN
0.000008
NaN
0.000002
NaN
0.000012
NaN
NaN
NaN
NaN
NaN
Archaea,Crenarchaeota,Thermoprotei,
NaN
0.000007
NaN
NaN
NaN
0.000030
NaN
NaN
NaN
NaN
NaN
Archaea,Euryarchaeota,,
0.000012
0.000019
0.000006
0.000005
0.000005
0.000138
3.450000e-06
1.200000e-06
NaN
0.000003
NaN
Archaea,Euryarchaeota,Halobacteria,
NaN
NaN
NaN
NaN
NaN
0.000004
NaN
NaN
NaN
NaN
NaN
Archaea,Euryarchaeota,Methanomicrobia,
NaN
0.000015
0.000003
NaN
0.000002
0.000014
6.540000e-07
1.540000e-06
NaN
NaN
0.000003
Archaea,Thaumarchaeota,,
0.000010
0.000023
NaN
NaN
NaN
0.000047
NaN
NaN
NaN
0.000002
0.000002
Bacteria,,,
0.023130
0.013129
0.007104
0.008717
0.008218
0.012571
5.797073e-03
4.896957e-03
0.004307
0.003573
0.002712
Bacteria,Acidobacteria,,
0.000063
0.000050
0.000010
0.000023
0.000070
0.000114
1.220000e-05
3.030000e-05
0.000005
0.000004
0.000004
Bacteria,Actinobacteria,,
0.000004
0.000006
NaN
0.000003
0.000002
0.000028
NaN
NaN
0.000002
0.000002
0.000002
Bacteria,Actinobacteria,Actinobacteria,
0.000005
0.000023
0.000002
0.000018
0.000016
0.000215
NaN
NaN
0.000004
NaN
NaN
Bacteria,Actinobacteria,Actinobacteria,Actinomycetales
0.002334
0.004005
0.002102
0.001994
0.001931
0.021981
2.202130e-03
2.250969e-03
0.002309
0.000522
0.000461
Bacteria,Aquificae,Aquificae,
0.000008
0.000010
0.000002
0.000003
NaN
0.000008
NaN
NaN
NaN
NaN
NaN
Bacteria,Armatimonadetes,,
0.000017
0.000016
0.000014
0.000010
0.000007
0.000056
1.910000e-06
1.240000e-05
0.000002
0.000004
NaN
Bacteria,Bacteroidetes,,
0.009136
0.000353
0.000632
0.003372
0.001292
0.002262
3.618330e-04
1.255900e-04
0.000395
0.000176
0.000289
Bacteria,Bacteroidetes,Cytophagia,
0.000023
0.000021
0.000008
0.000003
0.000004
0.000057
1.840000e-06
6.030000e-06
0.000002
NaN
NaN
Bacteria,Bacteroidetes,Flavobacteriia,
0.000027
0.000003
0.000003
0.000011
0.000004
0.000004
1.730000e-06
2.010000e-06
NaN
NaN
0.000002
Bacteria,Bacteroidetes,Flavobacteriia,Flavobacteriales
0.025151
0.011580
0.007849
0.018483
0.020208
0.016471
2.570023e-03
3.887158e-03
0.000914
0.008023
0.008751
Bacteria,Caldiserica,,
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
0.000003
NaN
NaN
Bacteria,Candidatus Saccharibacteria,,
NaN
0.000015
0.000002
NaN
NaN
NaN
NaN
NaN
NaN
NaN
0.000004
Bacteria,Chloroflexi,,
0.000115
0.000056
0.000065
0.000197
0.000220
0.000397
4.025800e-04
9.080000e-06
0.000006
0.000016
NaN
Bacteria,Chloroflexi,Dehalococcoidia,
NaN
NaN
NaN
0.000007
NaN
0.000004
NaN
NaN
NaN
NaN
NaN
Bacteria,Chloroflexi,Ktedonobacteria,
NaN
0.000033
0.000002
0.000003
0.000002
0.000024
NaN
1.270000e-06
NaN
NaN
0.000004
Bacteria,Cloacimonetes,,
0.000036
0.000012
0.000011
0.000044
0.000010
0.000013
4.060000e-06
8.430000e-07
0.000008
0.000002
0.000002
Bacteria,Cyanobacteria,,
0.004914
0.003402
0.003956
0.004236
0.002241
0.002439
4.328273e-03
2.204816e-03
0.010128
0.001960
0.001999
Bacteria,Deinococcus-Thermus,Deinococci,
NaN
NaN
NaN
NaN
NaN
0.000004
NaN
NaN
NaN
NaN
NaN
Bacteria,Fibrobacteres,,
0.000038
0.000318
0.000153
0.000002
0.000002
0.000005
1.320000e-06
1.130000e-06
NaN
NaN
NaN
Bacteria,Firmicutes,,
0.000091
0.000038
0.000009
0.000018
NaN
0.000033
1.184790e-04
NaN
NaN
NaN
NaN
Bacteria,Firmicutes,Bacilli,
0.000046
NaN
NaN
NaN
0.000003
0.000008
NaN
NaN
NaN
NaN
NaN
Bacteria,Firmicutes,Clostridia,
NaN
0.000008
NaN
0.000007
NaN
NaN
NaN
NaN
NaN
NaN
NaN
Bacteria,Gemmatimonadetes,Gemmatimonadetes,
0.000166
0.000040
0.000013
0.000003
0.000011
0.000108
6.660000e-06
3.190000e-05
0.000004
0.000005
0.000007
Bacteria,Lentisphaerae,Lentisphaeria,
NaN
0.000002
0.000004
NaN
NaN
0.000039
7.630000e-07
6.710000e-06
NaN
NaN
NaN
Bacteria,Planctomycetes,,
NaN
0.000003
NaN
0.000004
NaN
NaN
NaN
NaN
NaN
NaN
NaN
Bacteria,Poribacteria,,
NaN
0.000012
0.000004
0.000002
NaN
0.000010
1.220000e-06
5.660000e-06
NaN
NaN
NaN
Bacteria,Proteobacteria,,
0.032780
0.028324
0.018497
0.013872
0.015485
0.022215
1.293546e-02
1.074562e-02
0.008219
0.015943
0.012295
Bacteria,Proteobacteria,Alphaproteobacteria,
0.000294
0.000769
0.000165
0.000214
0.000414
0.001195
3.169100e-04
3.365720e-04
0.000560
0.000051
0.000083
Bacteria,Proteobacteria,Betaproteobacteria,
0.008793
0.008111
0.004763
0.004674
0.004833
0.007210
2.251889e-03
1.833776e-03
0.002631
0.002221
0.003799
Bacteria,Proteobacteria,Betaproteobacteria,Burkholderiales
0.171132
0.063464
0.042470
0.034007
0.077809
0.145454
4.249707e-02
6.414466e-02
0.086812
0.196686
0.128910
Bacteria,Proteobacteria,Betaproteobacteria,Methylophilales
0.315817
0.127372
0.099957
0.086968
0.230123
0.287983
4.282137e-02
3.464920e-02
0.075034
0.048542
0.232417
Bacteria,Proteobacteria,Deltaproteobacteria,
0.001556
0.001159
0.000453
0.000456
0.000162
0.000180
2.875930e-04
4.090000e-05
0.000030
0.000015
0.000012
Bacteria,Proteobacteria,Deltaproteobacteria,Bdellovibrionales
0.037076
0.000303
0.000063
0.000017
0.000071
0.000122
1.020000e-05
7.700000e-06
0.000015
0.000005
0.000003
Bacteria,Proteobacteria,Epsilonproteobacteria,
0.000244
0.000020
0.000004
0.000010
0.000005
0.000006
1.820000e-06
2.600000e-06
NaN
NaN
NaN
Bacteria,Proteobacteria,Gammaproteobacteria,
0.021199
0.020717
0.014458
0.012564
0.015710
0.015826
1.117357e-02
1.040339e-02
0.005382
0.009518
0.007299
Bacteria,Proteobacteria,Gammaproteobacteria,Methylococcales
0.236897
0.609278
0.727788
0.688982
0.539669
0.350339
7.588750e-01
7.480671e-01
0.733405
0.580924
0.443425
Bacteria,Proteobacteria,Gammaproteobacteria,Pseudomonadales
0.007148
0.005491
0.004193
0.004814
0.004435
0.005909
4.372023e-03
6.138208e-02
0.012814
0.010268
0.008739
Bacteria,Synergistetes,,
NaN
NaN
0.000003
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
Bacteria,Tenericutes,Mollicutes,
NaN
0.000003
NaN
NaN
0.000003
NaN
NaN
NaN
NaN
NaN
0.000002
Bacteria,Thermotogae,,
NaN
0.000004
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
Bacteria,Verrucomicrobia,,
0.000036
0.000225
0.000009
NaN
NaN
0.000026
NaN
3.490000e-05
0.000004
NaN
0.000004
Bacteria,Verrucomicrobia,Opitutae,
0.000015
0.000234
0.000012
0.000035
0.000018
0.000049
1.768400e-04
1.078852e-03
0.000019
0.000016
0.000010
Eukaryota,Mollusca,,
0.000020
0.000022
0.000021
0.000022
0.000202
0.000006
2.455560e-04
1.620000e-05
0.000002
0.000006
0.000003
unassigned,,,
0.013689
0.031732
0.021671
0.040855
0.029580
0.038222
5.176488e-02
2.706224e-02
0.027906
0.098949
0.116118
In [28]:
bae.values()[0].columns
Out[28]:
Index([u'index', u'oxygen', u'replicate', u'week', u'kingdom', u'phylum',
u'class', u'order', u'abundance'],
dtype='object')
In [29]:
bacteriopop_utils.pivot_for_abundance_matrix(bae.values()[0])
Out[29]:
week
4
5
6
7
8
9
10
11
12
13
14
phylo_concat
Archaea,,,
NaN
0.000018
0.000004
NaN
NaN
0.000094
NaN
1.200000e-06
NaN
NaN
NaN
Archaea,Crenarchaeota,,
NaN
0.000008
NaN
0.000002
NaN
0.000012
NaN
NaN
NaN
NaN
NaN
Archaea,Crenarchaeota,Thermoprotei,
NaN
0.000007
NaN
NaN
NaN
0.000030
NaN
NaN
NaN
NaN
NaN
Archaea,Euryarchaeota,,
0.000012
0.000019
0.000006
0.000005
0.000005
0.000138
3.450000e-06
1.200000e-06
NaN
0.000003
NaN
Archaea,Euryarchaeota,Halobacteria,
NaN
NaN
NaN
NaN
NaN
0.000004
NaN
NaN
NaN
NaN
NaN
Archaea,Euryarchaeota,Methanomicrobia,
NaN
0.000015
0.000003
NaN
0.000002
0.000014
6.540000e-07
1.540000e-06
NaN
NaN
0.000003
Archaea,Thaumarchaeota,,
0.000010
0.000023
NaN
NaN
NaN
0.000047
NaN
NaN
NaN
0.000002
0.000002
Bacteria,,,
0.023130
0.013129
0.007104
0.008717
0.008218
0.012571
5.797073e-03
4.896957e-03
0.004307
0.003573
0.002712
Bacteria,Acidobacteria,,
0.000063
0.000050
0.000010
0.000023
0.000070
0.000114
1.220000e-05
3.030000e-05
0.000005
0.000004
0.000004
Bacteria,Actinobacteria,,
0.000004
0.000006
NaN
0.000003
0.000002
0.000028
NaN
NaN
0.000002
0.000002
0.000002
Bacteria,Actinobacteria,Actinobacteria,
0.000005
0.000023
0.000002
0.000018
0.000016
0.000215
NaN
NaN
0.000004
NaN
NaN
Bacteria,Actinobacteria,Actinobacteria,Actinomycetales
0.002334
0.004005
0.002102
0.001994
0.001931
0.021981
2.202130e-03
2.250969e-03
0.002309
0.000522
0.000461
Bacteria,Aquificae,Aquificae,
0.000008
0.000010
0.000002
0.000003
NaN
0.000008
NaN
NaN
NaN
NaN
NaN
Bacteria,Armatimonadetes,,
0.000017
0.000016
0.000014
0.000010
0.000007
0.000056
1.910000e-06
1.240000e-05
0.000002
0.000004
NaN
Bacteria,Bacteroidetes,,
0.009136
0.000353
0.000632
0.003372
0.001292
0.002262
3.618330e-04
1.255900e-04
0.000395
0.000176
0.000289
Bacteria,Bacteroidetes,Cytophagia,
0.000023
0.000021
0.000008
0.000003
0.000004
0.000057
1.840000e-06
6.030000e-06
0.000002
NaN
NaN
Bacteria,Bacteroidetes,Flavobacteriia,
0.000027
0.000003
0.000003
0.000011
0.000004
0.000004
1.730000e-06
2.010000e-06
NaN
NaN
0.000002
Bacteria,Bacteroidetes,Flavobacteriia,Flavobacteriales
0.025151
0.011580
0.007849
0.018483
0.020208
0.016471
2.570023e-03
3.887158e-03
0.000914
0.008023
0.008751
Bacteria,Caldiserica,,
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
0.000003
NaN
NaN
Bacteria,Candidatus Saccharibacteria,,
NaN
0.000015
0.000002
NaN
NaN
NaN
NaN
NaN
NaN
NaN
0.000004
Bacteria,Chloroflexi,,
0.000115
0.000056
0.000065
0.000197
0.000220
0.000397
4.025800e-04
9.080000e-06
0.000006
0.000016
NaN
Bacteria,Chloroflexi,Dehalococcoidia,
NaN
NaN
NaN
0.000007
NaN
0.000004
NaN
NaN
NaN
NaN
NaN
Bacteria,Chloroflexi,Ktedonobacteria,
NaN
0.000033
0.000002
0.000003
0.000002
0.000024
NaN
1.270000e-06
NaN
NaN
0.000004
Bacteria,Cloacimonetes,,
0.000036
0.000012
0.000011
0.000044
0.000010
0.000013
4.060000e-06
8.430000e-07
0.000008
0.000002
0.000002
Bacteria,Cyanobacteria,,
0.004914
0.003402
0.003956
0.004236
0.002241
0.002439
4.328273e-03
2.204816e-03
0.010128
0.001960
0.001999
Bacteria,Deinococcus-Thermus,Deinococci,
NaN
NaN
NaN
NaN
NaN
0.000004
NaN
NaN
NaN
NaN
NaN
Bacteria,Fibrobacteres,,
0.000038
0.000318
0.000153
0.000002
0.000002
0.000005
1.320000e-06
1.130000e-06
NaN
NaN
NaN
Bacteria,Firmicutes,,
0.000091
0.000038
0.000009
0.000018
NaN
0.000033
1.184790e-04
NaN
NaN
NaN
NaN
Bacteria,Firmicutes,Bacilli,
0.000046
NaN
NaN
NaN
0.000003
0.000008
NaN
NaN
NaN
NaN
NaN
Bacteria,Firmicutes,Clostridia,
NaN
0.000008
NaN
0.000007
NaN
NaN
NaN
NaN
NaN
NaN
NaN
Bacteria,Gemmatimonadetes,Gemmatimonadetes,
0.000166
0.000040
0.000013
0.000003
0.000011
0.000108
6.660000e-06
3.190000e-05
0.000004
0.000005
0.000007
Bacteria,Lentisphaerae,Lentisphaeria,
NaN
0.000002
0.000004
NaN
NaN
0.000039
7.630000e-07
6.710000e-06
NaN
NaN
NaN
Bacteria,Planctomycetes,,
NaN
0.000003
NaN
0.000004
NaN
NaN
NaN
NaN
NaN
NaN
NaN
Bacteria,Poribacteria,,
NaN
0.000012
0.000004
0.000002
NaN
0.000010
1.220000e-06
5.660000e-06
NaN
NaN
NaN
Bacteria,Proteobacteria,,
0.032780
0.028324
0.018497
0.013872
0.015485
0.022215
1.293546e-02
1.074562e-02
0.008219
0.015943
0.012295
Bacteria,Proteobacteria,Alphaproteobacteria,
0.000294
0.000769
0.000165
0.000214
0.000414
0.001195
3.169100e-04
3.365720e-04
0.000560
0.000051
0.000083
Bacteria,Proteobacteria,Betaproteobacteria,
0.008793
0.008111
0.004763
0.004674
0.004833
0.007210
2.251889e-03
1.833776e-03
0.002631
0.002221
0.003799
Bacteria,Proteobacteria,Betaproteobacteria,Burkholderiales
0.171132
0.063464
0.042470
0.034007
0.077809
0.145454
4.249707e-02
6.414466e-02
0.086812
0.196686
0.128910
Bacteria,Proteobacteria,Betaproteobacteria,Methylophilales
0.315817
0.127372
0.099957
0.086968
0.230123
0.287983
4.282137e-02
3.464920e-02
0.075034
0.048542
0.232417
Bacteria,Proteobacteria,Deltaproteobacteria,
0.001556
0.001159
0.000453
0.000456
0.000162
0.000180
2.875930e-04
4.090000e-05
0.000030
0.000015
0.000012
Bacteria,Proteobacteria,Deltaproteobacteria,Bdellovibrionales
0.037076
0.000303
0.000063
0.000017
0.000071
0.000122
1.020000e-05
7.700000e-06
0.000015
0.000005
0.000003
Bacteria,Proteobacteria,Epsilonproteobacteria,
0.000244
0.000020
0.000004
0.000010
0.000005
0.000006
1.820000e-06
2.600000e-06
NaN
NaN
NaN
Bacteria,Proteobacteria,Gammaproteobacteria,
0.021199
0.020717
0.014458
0.012564
0.015710
0.015826
1.117357e-02
1.040339e-02
0.005382
0.009518
0.007299
Bacteria,Proteobacteria,Gammaproteobacteria,Methylococcales
0.236897
0.609278
0.727788
0.688982
0.539669
0.350339
7.588750e-01
7.480671e-01
0.733405
0.580924
0.443425
Bacteria,Proteobacteria,Gammaproteobacteria,Pseudomonadales
0.007148
0.005491
0.004193
0.004814
0.004435
0.005909
4.372023e-03
6.138208e-02
0.012814
0.010268
0.008739
Bacteria,Synergistetes,,
NaN
NaN
0.000003
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
Bacteria,Tenericutes,Mollicutes,
NaN
0.000003
NaN
NaN
0.000003
NaN
NaN
NaN
NaN
NaN
0.000002
Bacteria,Thermotogae,,
NaN
0.000004
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
Bacteria,Verrucomicrobia,,
0.000036
0.000225
0.000009
NaN
NaN
0.000026
NaN
3.490000e-05
0.000004
NaN
0.000004
Bacteria,Verrucomicrobia,Opitutae,
0.000015
0.000234
0.000012
0.000035
0.000018
0.000049
1.768400e-04
1.078852e-03
0.000019
0.000016
0.000010
Eukaryota,Mollusca,,
0.000020
0.000022
0.000021
0.000022
0.000202
0.000006
2.455560e-04
1.620000e-05
0.000002
0.000006
0.000003
unassigned,,,
0.013689
0.031732
0.021671
0.040855
0.029580
0.038222
5.176488e-02
2.706224e-02
0.027906
0.098949
0.116118
In [30]:
import pandas as pd
toy = {1:'a', 2:'b', 3:'c'}
for key, value in toy.items():
print key, value
value = toy[key] = pd.DataFrame([{'z':1, 'abc':'d'}, {'z':3, 'abc':'dx'}])
for v in toy.values():
print v
1 a
2 b
3 c
abc z
0 d 1
1 dx 3
abc z
0 d 1
1 dx 3
abc z
0 d 1
1 dx 3
In [31]:
prepared_DMD_matrices = bacteriopop_utils.prepare_DMD_matrices(
min_abundance=0.01, phylo_column='order', oxygen='all')
all groups of oxygen/week/replicate have abundances that sum to 1
columns after aggregating on phylo level: Index([u'oxygen', u'replicate', u'week', u'kingdom', u'phylum', u'class',
u'order', u'abundance'],
dtype='object')
first (up to) 5 phylo columns to
keep: ['' 'Flavobacteriales' 'Rhodobacterales' 'Burkholderiales'
'Methylophilales']
('High', 1)
('High', 2)
('High', 3)
('High', 4)
('Low', 1)
('Low', 2)
('Low', 3)
('Low', 4)
dictionary keys: [('High', 4), ('Low', 1), ('High', 3), ('Low', 2), ('High', 2), ('Low', 3), ('High', 1), ('Low', 4)]
dataframe_dict.keys(): [('High', 4), ('Low', 1), ('High', 3), ('Low', 2), ('High', 2), ('Low', 3), ('High', 1), ('Low', 4)]
week 4 5 6 7 \
phylo_concat
Archaea,,, 0.000000 0.000018 0.000004 0.000000
Archaea,Crenarchaeota,, 0.000000 0.000008 0.000000 0.000002
Archaea,Crenarchaeota,Thermoprotei, 0.000000 0.000007 0.000000 0.000000
Archaea,Euryarchaeota,, 0.000012 0.000019 0.000006 0.000005
Archaea,Euryarchaeota,Halobacteria, 0.000000 0.000000 0.000000 0.000000
week 8 9 10 11 \
phylo_concat
Archaea,,, 0.000000 0.000094 0.000000 0.000001
Archaea,Crenarchaeota,, 0.000000 0.000012 0.000000 0.000000
Archaea,Crenarchaeota,Thermoprotei, 0.000000 0.000030 0.000000 0.000000
Archaea,Euryarchaeota,, 0.000005 0.000138 0.000003 0.000001
Archaea,Euryarchaeota,Halobacteria, 0.000000 0.000004 0.000000 0.000000
week 12 13 14
phylo_concat
Archaea,,, 0 0.000000 0
Archaea,Crenarchaeota,, 0 0.000000 0
Archaea,Crenarchaeota,Thermoprotei, 0 0.000000 0
Archaea,Euryarchaeota,, 0 0.000003 0
Archaea,Euryarchaeota,Halobacteria, 0 0.000000 0
week 4 5 6 \
phylo_concat
Archaea,,, 0.000000 0.000014 0.000024
Archaea,Crenarchaeota,, 0.000000 0.000007 0.000000
Archaea,Euryarchaeota,, 0.000034 0.000060 0.000061
Archaea,Euryarchaeota,Halobacteria, 0.000012 0.000000 0.000000
Archaea,Euryarchaeota,Methanomicrobia, 0.000000 0.000000 0.000033
week 7 8 9 \
phylo_concat
Archaea,,, 0.000000 0.000005 0.000000
Archaea,Crenarchaeota,, 0.000000 0.000003 0.000000
Archaea,Euryarchaeota,, 0.000044 0.000036 0.000025
Archaea,Euryarchaeota,Halobacteria, 0.000000 0.000000 0.000000
Archaea,Euryarchaeota,Methanomicrobia, 0.000036 0.000005 0.000000
week 10 11 12 13 \
phylo_concat
Archaea,,, 0.000000 0 0.000000 0.000000
Archaea,Crenarchaeota,, 0.000000 0 0.000000 0.000000
Archaea,Euryarchaeota,, 0.000046 0 0.000010 0.000002
Archaea,Euryarchaeota,Halobacteria, 0.000000 0 0.000000 0.000000
Archaea,Euryarchaeota,Methanomicrobia, 0.000000 0 0.000002 0.000000
week 14
phylo_concat
Archaea,,, 0.000000
Archaea,Crenarchaeota,, 0.000000
Archaea,Euryarchaeota,, 0.000003
Archaea,Euryarchaeota,Halobacteria, 0.000000
Archaea,Euryarchaeota,Methanomicrobia, 0.000000
week 4 5 6 \
phylo_concat
Archaea,,, 0.000121 0.000000 0.000005
Archaea,Crenarchaeota,, 0.000018 0.000000 0.000000
Archaea,Crenarchaeota,Thermoprotei, 0.000028 0.000004 0.000002
Archaea,Euryarchaeota,, 0.000124 0.000000 0.000005
Archaea,Euryarchaeota,Halobacteria, 0.000012 0.000000 0.000000
week 7 8 9 \
phylo_concat
Archaea,,, 0.000000e+00 0.000001 0.000008
Archaea,Crenarchaeota,, 0.000000e+00 0.000002 0.000002
Archaea,Crenarchaeota,Thermoprotei, 8.780000e-07 0.000000 0.000005
Archaea,Euryarchaeota,, 1.120000e-06 0.000006 0.000013
Archaea,Euryarchaeota,Halobacteria, 0.000000e+00 0.000000 0.000000
week 10 11 12 13 14
phylo_concat
Archaea,,, 0.000000 0 0 0.000002 0
Archaea,Crenarchaeota,, 0.000000 0 0 0.000000 0
Archaea,Crenarchaeota,Thermoprotei, 0.000000 0 0 0.000000 0
Archaea,Euryarchaeota,, 0.000002 0 0 0.000000 0
Archaea,Euryarchaeota,Halobacteria, 0.000000 0 0 0.000000 0
week 4 5 6 \
phylo_concat
Archaea,,, 0.000000 0.000000 0.000000
Archaea,Crenarchaeota,, 0.000000 0.000000 0.000000
Archaea,Euryarchaeota,, 0.000026 0.000016 0.000044
Archaea,Euryarchaeota,Halobacteria, 0.000000 0.000000 0.000000
Archaea,Euryarchaeota,Methanomicrobia, 0.000000 0.000013 0.000040
week 7 8 9 \
phylo_concat
Archaea,,, 0.000014 0.000002 0.000081
Archaea,Crenarchaeota,, 0.000003 0.000000 0.000000
Archaea,Euryarchaeota,, 0.000025 0.000015 0.000037
Archaea,Euryarchaeota,Halobacteria, 0.000000 0.000000 0.000000
Archaea,Euryarchaeota,Methanomicrobia, 0.000002 0.000000 0.000000
week 10 11 12 13 14
phylo_concat
Archaea,,, 0.000000 0.000002 0.000000 0 0
Archaea,Crenarchaeota,, 0.000000 0.000000 0.000000 0 0
Archaea,Euryarchaeota,, 0.000033 0.000002 0.000001 0 0
Archaea,Euryarchaeota,Halobacteria, 0.000000 0.000000 0.000002 0 0
Archaea,Euryarchaeota,Methanomicrobia, 0.000014 0.000000 0.000000 0 0
week 4 5 6 7 \
phylo_concat
Archaea,,, 0.000015 0.000000 0.000000 0.000010
Archaea,Crenarchaeota,, 0.000005 0.000000 0.000000 0.000002
Archaea,Crenarchaeota,Thermoprotei, 0.000000 0.000000 0.000000 0.000005
Archaea,Euryarchaeota,, 0.000016 0.000007 0.000009 0.000022
Archaea,Euryarchaeota,Halobacteria, 0.000000 0.000000 0.000000 0.000020
week 8 9 10 11 \
phylo_concat
Archaea,,, 0.00000 0.000022 0.000001 0.000028
Archaea,Crenarchaeota,, 0.00000 0.000007 0.000000 0.000000
Archaea,Crenarchaeota,Thermoprotei, 0.00000 0.000000 0.000000 0.000000
Archaea,Euryarchaeota,, 0.00002 0.000023 0.000002 0.000034
Archaea,Euryarchaeota,Halobacteria, 0.00000 0.000000 0.000000 0.000003
week 12 13 14
phylo_concat
Archaea,,, 0 0.000000e+00 0.000000
Archaea,Crenarchaeota,, 0 0.000000e+00 0.000000
Archaea,Crenarchaeota,Thermoprotei, 0 0.000000e+00 0.000000
Archaea,Euryarchaeota,, 0 0.000000e+00 0.000051
Archaea,Euryarchaeota,Halobacteria, 0 6.150000e-07 0.000000
week 4 5 6 \
phylo_concat
Archaea,,, 0.000007 0.000000 0.000012
Archaea,Crenarchaeota,, 0.000000 0.000000 0.000000
Archaea,Euryarchaeota,, 0.000000 0.000028 0.000048
Archaea,Euryarchaeota,Halobacteria, 0.000000 0.000000 0.000000
Archaea,Euryarchaeota,Methanomicrobia, 0.000000 0.000000 0.000028
week 7 8 9 \
phylo_concat
Archaea,,, 0.000000 0.000000 0.000000e+00
Archaea,Crenarchaeota,, 0.000000 0.000000 7.640000e-07
Archaea,Euryarchaeota,, 0.000005 0.000010 7.660000e-07
Archaea,Euryarchaeota,Halobacteria, 0.000004 0.000000 1.100000e-06
Archaea,Euryarchaeota,Methanomicrobia, 0.000000 0.000002 2.000000e-06
week 10 11 12 13 \
phylo_concat
Archaea,,, 0.000000 0.000000 0 0
Archaea,Crenarchaeota,, 0.000000 0.000000 0 0
Archaea,Euryarchaeota,, 0.000001 0.000001 0 0
Archaea,Euryarchaeota,Halobacteria, 0.000000 0.000000 0 0
Archaea,Euryarchaeota,Methanomicrobia, 0.000000 0.000000 0 0
week 14
phylo_concat
Archaea,,, 0.000000e+00
Archaea,Crenarchaeota,, 0.000000e+00
Archaea,Euryarchaeota,, 5.020000e-07
Archaea,Euryarchaeota,Halobacteria, 0.000000e+00
Archaea,Euryarchaeota,Methanomicrobia, 0.000000e+00
week 4 5 6 7 \
phylo_concat
Archaea,,, 0.000028 0.000511 0.000136 0.000000
Archaea,Crenarchaeota,, 0.000000 0.000045 0.000006 0.000000
Archaea,Crenarchaeota,Thermoprotei, 0.000026 0.000076 0.000058 0.000009
Archaea,Euryarchaeota,, 0.000032 0.000451 0.000211 0.000004
Archaea,Euryarchaeota,Halobacteria, 0.000029 0.000018 0.000008 0.000000
week 8 9 10 11 \
phylo_concat
Archaea,,, 0.000134 0.000009 0 0.000000
Archaea,Crenarchaeota,, 0.000023 0.000000 0 0.000008
Archaea,Crenarchaeota,Thermoprotei, 0.000011 0.000006 0 0.000000
Archaea,Euryarchaeota,, 0.000192 0.000008 0 0.000005
Archaea,Euryarchaeota,Halobacteria, 0.000019 0.000000 0 0.000000
week 12 13 14
phylo_concat
Archaea,,, 0.000000 0.000000 0.000000
Archaea,Crenarchaeota,, 0.000000 0.000000 0.000000
Archaea,Crenarchaeota,Thermoprotei, 0.000000 0.000000 0.000000
Archaea,Euryarchaeota,, 0.000005 0.000013 0.000008
Archaea,Euryarchaeota,Halobacteria, 0.000000 0.000000 0.000000
week 4 5 6 7 \
phylo_concat
Archaea,,, 0.000000 0.00000 0.000027 0.000005
Archaea,Euryarchaeota,, 0.000058 0.00001 0.000081 0.000010
Archaea,Euryarchaeota,Halobacteria, 0.000000 0.00000 0.000005 0.000002
Archaea,Euryarchaeota,Methanomicrobia, 0.000000 0.00000 0.000063 0.000008
Archaea,Thaumarchaeota,, 0.000000 0.00000 0.000002 0.000004
week 8 9 10 \
phylo_concat
Archaea,,, 0.000000 0.000002 0.000000
Archaea,Euryarchaeota,, 0.000032 0.000022 0.000004
Archaea,Euryarchaeota,Halobacteria, 0.000002 0.000000 0.000000
Archaea,Euryarchaeota,Methanomicrobia, 0.000004 0.000005 0.000000
Archaea,Thaumarchaeota,, 0.000000 0.000000 0.000000
week 11 12 13 \
phylo_concat
Archaea,,, 0.000003 0.000000 0.000000
Archaea,Euryarchaeota,, 0.000006 0.000002 0.000001
Archaea,Euryarchaeota,Halobacteria, 0.000000 0.000000 0.000000
Archaea,Euryarchaeota,Methanomicrobia, 0.000000 0.000000 0.000000
Archaea,Thaumarchaeota,, 0.000000 0.000002 0.000000
week 14
phylo_concat
Archaea,,, 0.000000e+00
Archaea,Euryarchaeota,, 9.430000e-07
Archaea,Euryarchaeota,Halobacteria, 0.000000e+00
Archaea,Euryarchaeota,Methanomicrobia, 0.000000e+00
Archaea,Thaumarchaeota,, 1.170000e-06
In [32]:
print "woo hoo!"
woo hoo!
In [33]:
prepared_DMD_matrices.values()[0].head()
Out[33]:
week
4
5
6
7
8
9
10
11
12
13
14
phylo_concat
Archaea,,,
0.000000
0.000018
0.000004
0.000000
0.000000
0.000094
0.000000
0.000001
0
0.000000
0
Archaea,Crenarchaeota,,
0.000000
0.000008
0.000000
0.000002
0.000000
0.000012
0.000000
0.000000
0
0.000000
0
Archaea,Crenarchaeota,Thermoprotei,
0.000000
0.000007
0.000000
0.000000
0.000000
0.000030
0.000000
0.000000
0
0.000000
0
Archaea,Euryarchaeota,,
0.000012
0.000019
0.000006
0.000005
0.000005
0.000138
0.000003
0.000001
0
0.000003
0
Archaea,Euryarchaeota,Halobacteria,
0.000000
0.000000
0.000000
0.000000
0.000000
0.000004
0.000000
0.000000
0
0.000000
0
In [34]:
prepared_DMD_matrices.values()[0].fillna(0, inplace=True)
In [35]:
prepared_DMD_matrices.values()[0]
Out[35]:
week
4
5
6
7
8
9
10
11
12
13
14
phylo_concat
Archaea,,,
0.000000
0.000018
0.000004
0.000000
0.000000
0.000094
0.000000e+00
1.200000e-06
0.000000
0.000000
0.000000
Archaea,Crenarchaeota,,
0.000000
0.000008
0.000000
0.000002
0.000000
0.000012
0.000000e+00
0.000000e+00
0.000000
0.000000
0.000000
Archaea,Crenarchaeota,Thermoprotei,
0.000000
0.000007
0.000000
0.000000
0.000000
0.000030
0.000000e+00
0.000000e+00
0.000000
0.000000
0.000000
Archaea,Euryarchaeota,,
0.000012
0.000019
0.000006
0.000005
0.000005
0.000138
3.450000e-06
1.200000e-06
0.000000
0.000003
0.000000
Archaea,Euryarchaeota,Halobacteria,
0.000000
0.000000
0.000000
0.000000
0.000000
0.000004
0.000000e+00
0.000000e+00
0.000000
0.000000
0.000000
Archaea,Euryarchaeota,Methanomicrobia,
0.000000
0.000015
0.000003
0.000000
0.000002
0.000014
6.540000e-07
1.540000e-06
0.000000
0.000000
0.000003
Archaea,Thaumarchaeota,,
0.000010
0.000023
0.000000
0.000000
0.000000
0.000047
0.000000e+00
0.000000e+00
0.000000
0.000002
0.000002
Bacteria,,,
0.023130
0.013129
0.007104
0.008717
0.008218
0.012571
5.797073e-03
4.896957e-03
0.004307
0.003573
0.002712
Bacteria,Acidobacteria,,
0.000063
0.000050
0.000010
0.000023
0.000070
0.000114
1.220000e-05
3.030000e-05
0.000005
0.000004
0.000004
Bacteria,Actinobacteria,,
0.000004
0.000006
0.000000
0.000003
0.000002
0.000028
0.000000e+00
0.000000e+00
0.000002
0.000002
0.000002
Bacteria,Actinobacteria,Actinobacteria,
0.000005
0.000023
0.000002
0.000018
0.000016
0.000215
0.000000e+00
0.000000e+00
0.000004
0.000000
0.000000
Bacteria,Actinobacteria,Actinobacteria,Actinomycetales
0.002334
0.004005
0.002102
0.001994
0.001931
0.021981
2.202130e-03
2.250969e-03
0.002309
0.000522
0.000461
Bacteria,Aquificae,Aquificae,
0.000008
0.000010
0.000002
0.000003
0.000000
0.000008
0.000000e+00
0.000000e+00
0.000000
0.000000
0.000000
Bacteria,Armatimonadetes,,
0.000017
0.000016
0.000014
0.000010
0.000007
0.000056
1.910000e-06
1.240000e-05
0.000002
0.000004
0.000000
Bacteria,Bacteroidetes,,
0.009136
0.000353
0.000632
0.003372
0.001292
0.002262
3.618330e-04
1.255900e-04
0.000395
0.000176
0.000289
Bacteria,Bacteroidetes,Cytophagia,
0.000023
0.000021
0.000008
0.000003
0.000004
0.000057
1.840000e-06
6.030000e-06
0.000002
0.000000
0.000000
Bacteria,Bacteroidetes,Cytophagia,Cytophagales
0.011238
0.001037
0.001847
0.006268
0.001422
0.002344
9.252900e-04
1.550500e-04
0.000171
0.000333
0.000652
Bacteria,Bacteroidetes,Flavobacteriia,
0.000027
0.000003
0.000003
0.000011
0.000004
0.000004
1.730000e-06
2.010000e-06
0.000000
0.000000
0.000002
Bacteria,Bacteroidetes,Flavobacteriia,Flavobacteriales
0.025151
0.011580
0.007849
0.018483
0.020208
0.016471
2.570023e-03
3.887158e-03
0.000914
0.008023
0.008751
Bacteria,Bacteroidetes,Sphingobacteriia,Sphingobacteriales
0.008508
0.000739
0.001565
0.011688
0.003022
0.003422
1.839286e-03
1.196624e-03
0.002869
0.002017
0.005323
Bacteria,Caldiserica,,
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000e+00
0.000000e+00
0.000003
0.000000
0.000000
Bacteria,Candidatus Saccharibacteria,,
0.000000
0.000015
0.000002
0.000000
0.000000
0.000000
0.000000e+00
0.000000e+00
0.000000
0.000000
0.000004
Bacteria,Chloroflexi,,
0.000115
0.000056
0.000065
0.000197
0.000220
0.000397
4.025800e-04
9.080000e-06
0.000006
0.000016
0.000000
Bacteria,Chloroflexi,Dehalococcoidia,
0.000000
0.000000
0.000000
0.000007
0.000000
0.000004
0.000000e+00
0.000000e+00
0.000000
0.000000
0.000000
Bacteria,Chloroflexi,Ktedonobacteria,
0.000000
0.000033
0.000002
0.000003
0.000002
0.000024
0.000000e+00
1.270000e-06
0.000000
0.000000
0.000004
Bacteria,Cloacimonetes,,
0.000036
0.000012
0.000011
0.000044
0.000010
0.000013
4.060000e-06
8.430000e-07
0.000008
0.000002
0.000002
Bacteria,Cyanobacteria,,
0.004914
0.003402
0.003956
0.004236
0.002241
0.002439
4.328273e-03
2.204816e-03
0.010128
0.001960
0.001999
Bacteria,Deinococcus-Thermus,Deinococci,
0.000000
0.000000
0.000000
0.000000
0.000000
0.000004
0.000000e+00
0.000000e+00
0.000000
0.000000
0.000000
Bacteria,Fibrobacteres,,
0.000038
0.000318
0.000153
0.000002
0.000002
0.000005
1.320000e-06
1.130000e-06
0.000000
0.000000
0.000000
Bacteria,Firmicutes,,
0.000091
0.000038
0.000009
0.000018
0.000000
0.000033
1.184790e-04
0.000000e+00
0.000000
0.000000
0.000000
...
...
...
...
...
...
...
...
...
...
...
...
Bacteria,Gemmatimonadetes,Gemmatimonadetes,
0.000166
0.000040
0.000013
0.000003
0.000011
0.000108
6.660000e-06
3.190000e-05
0.000004
0.000005
0.000007
Bacteria,Lentisphaerae,Lentisphaeria,
0.000000
0.000002
0.000004
0.000000
0.000000
0.000039
7.630000e-07
6.710000e-06
0.000000
0.000000
0.000000
Bacteria,Planctomycetes,,
0.000000
0.000003
0.000000
0.000004
0.000000
0.000000
0.000000e+00
0.000000e+00
0.000000
0.000000
0.000000
Bacteria,Poribacteria,,
0.000000
0.000012
0.000004
0.000002
0.000000
0.000010
1.220000e-06
5.660000e-06
0.000000
0.000000
0.000000
Bacteria,Proteobacteria,,
0.032780
0.028324
0.018497
0.013872
0.015485
0.022215
1.293546e-02
1.074562e-02
0.008219
0.015943
0.012295
Bacteria,Proteobacteria,Alphaproteobacteria,
0.000294
0.000769
0.000165
0.000214
0.000414
0.001195
3.169100e-04
3.365720e-04
0.000560
0.000051
0.000083
Bacteria,Proteobacteria,Alphaproteobacteria,Rhizobiales
0.002586
0.004059
0.002573
0.004111
0.004736
0.007465
8.669128e-03
2.986190e-03
0.004464
0.005136
0.007796
Bacteria,Proteobacteria,Alphaproteobacteria,Rhodobacterales
0.001428
0.000449
0.000196
0.000298
0.000325
0.001484
6.558390e-04
4.476700e-05
0.008172
0.000201
0.000220
Bacteria,Proteobacteria,Alphaproteobacteria,Rhodospirillales
0.001681
0.002379
0.002162
0.005629
0.005148
0.004566
6.330415e-03
1.334734e-03
0.000672
0.002212
0.001414
Bacteria,Proteobacteria,Betaproteobacteria,
0.008793
0.008111
0.004763
0.004674
0.004833
0.007210
2.251889e-03
1.833776e-03
0.002631
0.002221
0.003799
Bacteria,Proteobacteria,Betaproteobacteria,Burkholderiales
0.171132
0.063464
0.042470
0.034007
0.077809
0.145454
4.249707e-02
6.414466e-02
0.086812
0.196686
0.128910
Bacteria,Proteobacteria,Betaproteobacteria,Methylophilales
0.315817
0.127372
0.099957
0.086968
0.230123
0.287983
4.282137e-02
3.464920e-02
0.075034
0.048542
0.232417
Bacteria,Proteobacteria,Betaproteobacteria,Rhodocyclales
0.005453
0.006298
0.004289
0.005846
0.005541
0.006504
4.987410e-03
3.180402e-03
0.002764
0.004198
0.003382
Bacteria,Proteobacteria,Deltaproteobacteria,
0.001556
0.001159
0.000453
0.000456
0.000162
0.000180
2.875930e-04
4.090000e-05
0.000030
0.000015
0.000012
Bacteria,Proteobacteria,Deltaproteobacteria,Bdellovibrionales
0.037076
0.000303
0.000063
0.000017
0.000071
0.000122
1.020000e-05
7.700000e-06
0.000015
0.000005
0.000003
Bacteria,Proteobacteria,Deltaproteobacteria,Desulfobacterales
0.001610
0.001030
0.001250
0.001313
0.000808
0.000903
1.481878e-03
1.656222e-03
0.000087
0.000121
0.000150
Bacteria,Proteobacteria,Deltaproteobacteria,Myxococcales
0.003215
0.015440
0.002386
0.008818
0.001848
0.003819
2.242297e-03
8.454640e-04
0.001608
0.000090
0.000057
Bacteria,Proteobacteria,Epsilonproteobacteria,
0.000244
0.000020
0.000004
0.000010
0.000005
0.000006
1.820000e-06
2.600000e-06
0.000000
0.000000
0.000000
Bacteria,Proteobacteria,Gammaproteobacteria,
0.021199
0.020717
0.014458
0.012564
0.015710
0.015826
1.117357e-02
1.040339e-02
0.005382
0.009518
0.007299
Bacteria,Proteobacteria,Gammaproteobacteria,Chromatiales
0.023851
0.003703
0.002500
0.002844
0.002329
0.001743
2.323267e-03
6.145490e-04
0.000145
0.002304
0.001908
Bacteria,Proteobacteria,Gammaproteobacteria,Methylococcales
0.236897
0.609278
0.727788
0.688982
0.539669
0.350339
7.588750e-01
7.480671e-01
0.733405
0.580924
0.443425
Bacteria,Proteobacteria,Gammaproteobacteria,Pseudomonadales
0.007148
0.005491
0.004193
0.004814
0.004435
0.005909
4.372023e-03
6.138208e-02
0.012814
0.010268
0.008739
Bacteria,Proteobacteria,Gammaproteobacteria,Xanthomonadales
0.001830
0.001639
0.001707
0.002120
0.002788
0.001807
1.894457e-03
1.100022e-03
0.000464
0.000669
0.000710
Bacteria,Synergistetes,,
0.000000
0.000000
0.000003
0.000000
0.000000
0.000000
0.000000e+00
0.000000e+00
0.000000
0.000000
0.000000
Bacteria,Tenericutes,Mollicutes,
0.000000
0.000003
0.000000
0.000000
0.000003
0.000000
0.000000e+00
0.000000e+00
0.000000
0.000000
0.000002
Bacteria,Thermotogae,,
0.000000
0.000004
0.000000
0.000000
0.000000
0.000000
0.000000e+00
0.000000e+00
0.000000
0.000000
0.000000
Bacteria,Verrucomicrobia,,
0.000036
0.000225
0.000009
0.000000
0.000000
0.000026
0.000000e+00
3.490000e-05
0.000004
0.000000
0.000004
Bacteria,Verrucomicrobia,Opitutae,
0.000015
0.000234
0.000012
0.000035
0.000018
0.000049
1.768400e-04
1.078852e-03
0.000019
0.000016
0.000010
Eukaryota,Mollusca,,
0.000020
0.000022
0.000021
0.000022
0.000202
0.000006
2.455560e-04
1.620000e-05
0.000002
0.000006
0.000003
unassigned,,,
0.013689
0.031732
0.021671
0.040855
0.029580
0.038222
5.176488e-02
2.706224e-02
0.027906
0.098949
0.116118
63 rows × 11 columns
In [ ]:
Content source: JanetMatsen/bacteriopop
Similar notebooks: