In [1]:
%run GLOBALS.py


3.5.1 |Continuum Analytics, Inc.| (default, Dec  7 2015, 11:24:55) 
[GCC 4.2.1 (Apple Inc. build 5577)]

In [2]:
import matplotlib as mpl
mpl.use('TkAgg')
import matplotlib.pyplot as plt
%matplotlib inline

import pandas as pd
import seaborn as sns

In [3]:
import abundance_plot_utils
To sum up: Could you make separate heatmaps for Burkholderiales, starting with Order and going as fine as genus, whichever classified to genus level, as many as it takes? Same for Bacteroidetes starting with Phylum and going all the way to the genus. I want to see how much diversity we have among these species and whether there are dominant species in each microcosm. Thanks, Mila

In [4]:
burkhold = {'Order':['Burkholderiales']}

In [5]:
list(burkhold.keys())


Out[5]:
['Order']

In [6]:
list(burkhold.values())[0]


Out[6]:
['Burkholderiales']

In [7]:
data_reduced = pd.read_csv(MAIN_DIR + "/results/reduced_data--all_taxonomy_remains.csv")

In [8]:
data_reduced.columns


Out[8]:
Index(['Kingdom', 'Phylum', 'Class', 'Order', 'Family', 'Genus', 'Length',
       'fraction of reads', 'project', 'ID', 'oxy', 'rep', 'week'],
      dtype='object')

In [9]:
tmp = ['Kingdom', 'Phylum', 'Class', 'Order', 'Family', 'Genus']

In [10]:
abundance_plot_utils.taxonomy_levels_above(taxa_level='Order')


Out[10]:
['Kingdom', 'Phylum', 'Class']

In [11]:
abundance_plot_utils.taxonomy_levels_below(taxa_level='Order')


Out[11]:
['Family', 'Genus']

In [12]:
abundance_plot_utils.label_from_taxa_colnames('Dog', 'Cat', 'Rat')


Out[12]:
'Dog, Cat, Rat'

In [13]:
abundance_plot_utils.taxa_dict_to_descriptive_string(burkhold)


Order
Out[13]:
'Order-Burkholderiales'

In [14]:
list(burkhold.values())[0][0]


Out[14]:
'Burkholderiales'

In [15]:
abundance_plot_utils.heatmap_all_below(dataframe = data_reduced, 
                                       taxa_dict = burkhold,
                                       main_dir = MAIN_DIR,
                                       plot_dir ='plots/drill_down',
                                        )


     Kingdom          Phylum               Class            Order  \
7   Bacteria  Proteobacteria  Betaproteobacteria  Burkholderiales   
11  Bacteria  Proteobacteria  Betaproteobacteria  Burkholderiales   
16  Bacteria  Proteobacteria  Betaproteobacteria  Burkholderiales   
24  Bacteria  Proteobacteria  Betaproteobacteria  Burkholderiales   
27  Bacteria  Proteobacteria  Betaproteobacteria  Burkholderiales   

              Family         Genus   Length  fraction of reads  project  \
7     Comamonadaceae         other  2023406           0.024610  1056013   
11    Comamonadaceae    Acidovorax  1257730           0.015060  1056013   
16               NaN         other   631062           0.006768  1056013   
24    Comamonadaceae   Ramlibacter   266677           0.003959  1056013   
27  Burkholderiaceae  Burkholderia   270314           0.003196  1056013   

        ID  oxy  rep  week  
7   1_LOW4  Low    1     4  
11  1_LOW4  Low    1     4  
16  1_LOW4  Low    1     4  
24  1_LOW4  Low    1     4  
27  1_LOW4  Low    1     4  
label_cols: ['Family', 'Genus']
//anaconda/envs/elviz/lib/python3.5/site-packages/pandas/core/frame.py:2756: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  downcast=downcast, **kwargs)
../abundance_plot_utils.py:490: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  columns=label_cols), axis=1)
['Comamonadaceae' 'Comamonadaceae, Acidovorax' 'other'
 'Comamonadaceae, Ramlibacter' 'Burkholderiaceae, Burkholderia']
//anaconda/envs/elviz/lib/python3.5/site-packages/matplotlib/figure.py:1744: UserWarning: This figure includes Axes that are not compatible with tight_layout, so its results might be incorrect.
  warnings.warn("This figure includes Axes that are not "
Order
plots/drill_down/Order-Burkholderiales--min_0.001--x-week.pdf
Out[15]:
<seaborn.axisgrid.FacetGrid at 0x1117bea58>

In [16]:
abundance_plot_utils.heatmap_all_below(dataframe = data_reduced, 
                                       taxa_dict = burkhold,
                                       main_dir = MAIN_DIR,
                                       plot_dir='plots/drill_down',
                                       low_cutoff = 0.01)


     Kingdom          Phylum               Class            Order  \
7   Bacteria  Proteobacteria  Betaproteobacteria  Burkholderiales   
11  Bacteria  Proteobacteria  Betaproteobacteria  Burkholderiales   
16  Bacteria  Proteobacteria  Betaproteobacteria  Burkholderiales   
24  Bacteria  Proteobacteria  Betaproteobacteria  Burkholderiales   
27  Bacteria  Proteobacteria  Betaproteobacteria  Burkholderiales   

              Family         Genus   Length  fraction of reads  project  \
7     Comamonadaceae         other  2023406           0.024610  1056013   
11    Comamonadaceae    Acidovorax  1257730           0.015060  1056013   
16               NaN         other   631062           0.006768  1056013   
24    Comamonadaceae   Ramlibacter   266677           0.003959  1056013   
27  Burkholderiaceae  Burkholderia   270314           0.003196  1056013   

        ID  oxy  rep  week  
7   1_LOW4  Low    1     4  
11  1_LOW4  Low    1     4  
16  1_LOW4  Low    1     4  
24  1_LOW4  Low    1     4  
27  1_LOW4  Low    1     4  
label_cols: ['Family', 'Genus']
//anaconda/envs/elviz/lib/python3.5/site-packages/pandas/core/frame.py:2756: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  downcast=downcast, **kwargs)
../abundance_plot_utils.py:490: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  columns=label_cols), axis=1)
['Comamonadaceae' 'Comamonadaceae, Acidovorax' 'other' 'Oxalobacteraceae'
 'Comamonadaceae, Curvibacter']
//anaconda/envs/elviz/lib/python3.5/site-packages/matplotlib/figure.py:1744: UserWarning: This figure includes Axes that are not compatible with tight_layout, so its results might be incorrect.
  warnings.warn("This figure includes Axes that are not "
Order
plots/drill_down/Order-Burkholderiales--min_0.01--x-week.pdf
Out[16]:
<seaborn.axisgrid.FacetGrid at 0x115d6f4a8>

In [17]:
bacteroid = {'Phylum':['Bacteroidetes']}

abundance_plot_utils.heatmap_all_below(dataframe = data_reduced, 
                                       taxa_dict = bacteroid,
                                       plot_dir='plots/drill_down')


     Kingdom         Phylum             Class               Order  \
4   Bacteria  Bacteroidetes    Flavobacteriia    Flavobacteriales   
25  Bacteria  Bacteroidetes               NaN                 NaN   
26  Bacteria  Bacteroidetes    Flavobacteriia    Flavobacteriales   
80  Bacteria  Bacteroidetes  Sphingobacteriia  Sphingobacteriales   
91  Bacteria  Bacteroidetes  Sphingobacteriia  Sphingobacteriales   

               Family           Genus   Length  fraction of reads  project  \
4   Flavobacteriaceae  Flavobacterium  4654774           0.065548  1056013   
25                NaN           other   335358           0.003765  1056013   
26  Flavobacteriaceae           other   316010           0.003553  1056013   
80                NaN           other    90803           0.000532  1056013   
91   Chitinophagaceae           other    74820           0.000493  1056013   

        ID  oxy  rep  week  
4   1_LOW4  Low    1     4  
25  1_LOW4  Low    1     4  
26  1_LOW4  Low    1     4  
80  1_LOW4  Low    1     4  
91  1_LOW4  Low    1     4  
label_cols: ['Class', 'Order', 'Family', 'Genus']
//anaconda/envs/elviz/lib/python3.5/site-packages/pandas/core/frame.py:2756: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  downcast=downcast, **kwargs)
../abundance_plot_utils.py:490: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  columns=label_cols), axis=1)
['Flavobacteriia, Flavobacteriales, Flavobacteriaceae, Flavobacterium'
 'other' 'Flavobacteriia, Flavobacteriales, Flavobacteriaceae'
 'Sphingobacteriia, Sphingobacteriales' 'Flavobacteriia, Flavobacteriales']
//anaconda/envs/elviz/lib/python3.5/site-packages/matplotlib/figure.py:1744: UserWarning: This figure includes Axes that are not compatible with tight_layout, so its results might be incorrect.
  warnings.warn("This figure includes Axes that are not "
Phylum
plots/drill_down/Phylum-Bacteroidetes--min_0.001--x-week.pdf
Out[17]:
<seaborn.axisgrid.FacetGrid at 0x1183a1d68>

In [18]:
bacteroid = {'Phylum':['Bacteroidetes']}

abundance_plot_utils.heatmap_all_below(dataframe = data_reduced, 
                                       taxa_dict = bacteroid,
                                       plot_dir='plots/drill_down',
                                       low_cutoff = 0.005)


     Kingdom         Phylum             Class               Order  \
4   Bacteria  Bacteroidetes    Flavobacteriia    Flavobacteriales   
25  Bacteria  Bacteroidetes               NaN                 NaN   
26  Bacteria  Bacteroidetes    Flavobacteriia    Flavobacteriales   
80  Bacteria  Bacteroidetes  Sphingobacteriia  Sphingobacteriales   
91  Bacteria  Bacteroidetes  Sphingobacteriia  Sphingobacteriales   

               Family           Genus   Length  fraction of reads  project  \
4   Flavobacteriaceae  Flavobacterium  4654774           0.065548  1056013   
25                NaN           other   335358           0.003765  1056013   
26  Flavobacteriaceae           other   316010           0.003553  1056013   
80                NaN           other    90803           0.000532  1056013   
91   Chitinophagaceae           other    74820           0.000493  1056013   

        ID  oxy  rep  week  
4   1_LOW4  Low    1     4  
25  1_LOW4  Low    1     4  
26  1_LOW4  Low    1     4  
80  1_LOW4  Low    1     4  
91  1_LOW4  Low    1     4  
label_cols: ['Class', 'Order', 'Family', 'Genus']
//anaconda/envs/elviz/lib/python3.5/site-packages/pandas/core/frame.py:2756: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  downcast=downcast, **kwargs)
../abundance_plot_utils.py:490: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  columns=label_cols), axis=1)
['Flavobacteriia, Flavobacteriales, Flavobacteriaceae, Flavobacterium'
 'Flavobacteriia, Flavobacteriales, Flavobacteriaceae' 'other'
 'Sphingobacteriia, Sphingobacteriales'
 'Flavobacteriia, Flavobacteriales, Cryomorphaceae, Fluviicola']
//anaconda/envs/elviz/lib/python3.5/site-packages/matplotlib/figure.py:1744: UserWarning: This figure includes Axes that are not compatible with tight_layout, so its results might be incorrect.
  warnings.warn("This figure includes Axes that are not "
Phylum
plots/drill_down/Phylum-Bacteroidetes--min_0.005--x-week.pdf
Out[18]:
<seaborn.axisgrid.FacetGrid at 0x119f51cc0>
x = build_lambda(abundance_plot_utils.label_from_phylo_colnames, ['Family', 'Genus'])
data_reduced.head()
data_reduced.fillna('unknown', inplace=True)
data_reduced.apply(x, axis=1)

In [19]:
import abundance_utils

In [20]:
toy = data_reduced.head()

In [21]:
toy


Out[21]:
Kingdom Phylum Class Order Family Genus Length fraction of reads project ID oxy rep week
0 Bacteria Proteobacteria Gammaproteobacteria Methylococcales Methylococcaceae Methylobacter 9948861 0.205558 1056013 1_LOW4 Low 1 4
1 Bacteria Proteobacteria Betaproteobacteria Methylophilales Methylophilaceae Methylotenera 5066955 0.185898 1056013 1_LOW4 Low 1 4
2 Bacteria Proteobacteria NaN NaN NaN other 3930509 0.075027 1056013 1_LOW4 Low 1 4
3 Bacteria Proteobacteria Gammaproteobacteria NaN NaN other 5620690 0.073601 1056013 1_LOW4 Low 1 4
4 Bacteria Bacteroidetes Flavobacteriia Flavobacteriales Flavobacteriaceae Flavobacterium 4654774 0.065548 1056013 1_LOW4 Low 1 4
Janet: Could we now do the same with Myxococcales/Bdellovibrionales? I want to see whether we can correlate predator abundances with shifts in other populations. Probably even at Myxococcaceae/Bacteriovoraxaceae? Thanks!

In [22]:
abundance_plot_utils.heatmap_all_below(dataframe = data_reduced, 
                                       taxa_dict = {'Order':['Myxococcales']},
                                       main_dir = MAIN_DIR,
                                       plot_dir='plots/drill_down',
                                       low_cutoff = 0)


     Kingdom          Phylum                Class         Order  \
19  Bacteria  Proteobacteria  Deltaproteobacteria  Myxococcales   
22  Bacteria  Proteobacteria  Deltaproteobacteria  Myxococcales   
23  Bacteria  Proteobacteria  Deltaproteobacteria  Myxococcales   
46  Bacteria  Proteobacteria  Deltaproteobacteria  Myxococcales   
49  Bacteria  Proteobacteria  Deltaproteobacteria  Myxococcales   

           Family       Genus  Length  fraction of reads  project      ID  \
19  Polyangiaceae       other  650466           0.005753  1056013  1_LOW4   
22            NaN       other  522193           0.004840  1056013  1_LOW4   
23  Polyangiaceae   Sorangium  527266           0.004477  1056013  1_LOW4   
46  Myxococcaceae  Myxococcus  122639           0.001113  1056013  1_LOW4   
49   Kofleriaceae  Haliangium  120404           0.001060  1056013  1_LOW4   

    oxy  rep  week  
19  Low    1     4  
22  Low    1     4  
23  Low    1     4  
46  Low    1     4  
49  Low    1     4  
label_cols: ['Family', 'Genus']
//anaconda/envs/elviz/lib/python3.5/site-packages/pandas/core/frame.py:2756: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  downcast=downcast, **kwargs)
../abundance_plot_utils.py:490: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  columns=label_cols), axis=1)
['Polyangiaceae' 'other' 'Polyangiaceae, Sorangium'
 'Myxococcaceae, Myxococcus' 'Kofleriaceae, Haliangium']
//anaconda/envs/elviz/lib/python3.5/site-packages/matplotlib/figure.py:1744: UserWarning: This figure includes Axes that are not compatible with tight_layout, so its results might be incorrect.
  warnings.warn("This figure includes Axes that are not "
Order
plots/drill_down/Order-Myxococcales--min_0--x-week.pdf
Out[22]:
<seaborn.axisgrid.FacetGrid at 0x119f7e940>

In [23]:
# Bdellovibrionales
abundance_plot_utils.heatmap_all_below(dataframe = data_reduced, 
                                       taxa_dict = {'Order':['Bdellovibrionales']},
                                       main_dir = MAIN_DIR,
                                       plot_dir='plots/drill_down',
                                       low_cutoff = 0)


       Kingdom          Phylum                Class              Order  \
214   Bacteria  Proteobacteria  Deltaproteobacteria  Bdellovibrionales   
718   Bacteria  Proteobacteria  Deltaproteobacteria  Bdellovibrionales   
831   Bacteria  Proteobacteria  Deltaproteobacteria  Bdellovibrionales   
969   Bacteria  Proteobacteria  Deltaproteobacteria  Bdellovibrionales   
1284  Bacteria  Proteobacteria  Deltaproteobacteria  Bdellovibrionales   

                  Family          Genus  Length  fraction of reads  project  \
214   Bdellovibrionaceae   Bdellovibrio   14841           0.000129  1056013   
718   Bacteriovoracaceae  Bacteriovorax     904           0.000006  1056013   
831   Bacteriovoracaceae  Bacteriovorax  210648           0.001257  1056016   
969   Bdellovibrionaceae   Bdellovibrio   26041           0.000187  1056016   
1284                 NaN          other    5010           0.000027  1056016   

          ID  oxy  rep  week  
214   1_LOW4  Low    1     4  
718   1_LOW4  Low    1     4  
831   2_LOW4  Low    2     4  
969   2_LOW4  Low    2     4  
1284  2_LOW4  Low    2     4  
label_cols: ['Family', 'Genus']
['Bdellovibrionaceae, Bdellovibrio' 'Bacteriovoracaceae, Bacteriovorax'
 'other']
//anaconda/envs/elviz/lib/python3.5/site-packages/pandas/core/frame.py:2756: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  downcast=downcast, **kwargs)
../abundance_plot_utils.py:490: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  columns=label_cols), axis=1)
//anaconda/envs/elviz/lib/python3.5/site-packages/matplotlib/figure.py:1744: UserWarning: This figure includes Axes that are not compatible with tight_layout, so its results might be incorrect.
  warnings.warn("This figure includes Axes that are not "
Order
plots/drill_down/Order-Bdellovibrionales--min_0--x-week.pdf
Out[23]:
<seaborn.axisgrid.FacetGrid at 0x11cf8f9b0>

In [ ]: