In [1]:
import sys
print(sys.version)

# add path to .py files so they can be loaded.
module_dir = "../"
sys.path.append(module_dir)

# define the plot dir
MAIN_DIR = '../'
PLOT_DIR = './plots'
RAW_DATA_DIR = '../raw_data/'


3.5.1 |Continuum Analytics, Inc.| (default, Dec  7 2015, 11:24:55) 
[GCC 4.2.1 (Apple Inc. build 5577)]

In [2]:
import pandas as pd
import seaborn as sns

In [3]:
%matplotlib inline

import matplotlib.pyplot as plt

In [4]:
import abundance_plot_utils

In [5]:
data_reduced = \
        pd.read_csv(MAIN_DIR +
                    "results/reduced_data--all_taxonomy_remains.csv")

In [6]:
METHYLOCOCCACEAE = {'Genus': ['Methylobacter', 'Methylovulum',
                              'Methylomonas', 'Methylosarcina']}

In [7]:
def prepare_df(taxa_dict, main_dir, dataframe):
    plot_data = abundance_plot_utils.aggregate_mixed_taxonomy(
        dataframe=dataframe, taxa_dict=taxa_dict, main_dir=main_dir)
    return plot_data

prepped_data = prepare_df(taxa_dict = METHYLOCOCCACEAE, main_dir=MAIN_DIR, dataframe=data_reduced)


Index(['Kingdom', 'Phylum', 'Class', 'Order', 'Family', 'Genus', 'Length',
       'fraction of reads', 'project', 'ID', 'oxy', 'rep', 'week'],
      dtype='object')
Index(['Kingdom', 'Phylum', 'Class', 'Order', 'Family', 'Genus', 'Length',
       'fraction of reads', 'project', 'ID', 'oxy', 'rep', 'week'],
      dtype='object')
Index(['Kingdom', 'Phylum', 'Class', 'Order', 'Family', 'Genus', 'Length',
       'fraction of reads', 'project', 'ID', 'oxy', 'rep', 'week'],
      dtype='object')
Index(['Kingdom', 'Phylum', 'Class', 'Order', 'Family', 'Genus', 'Length',
       'fraction of reads', 'project', 'ID', 'oxy', 'rep', 'week'],
      dtype='object')

In [8]:
prepped_data.head()


Out[8]:
ID abundance sum taxonomic level taxonomic name oxy rep week project
0 100_LOW12 0.488347 Genus Methylobacter Low 4 12 1056214
1 100_LOW12 0.002571 Genus Methylovulum Low 4 12 1056214
2 100_LOW12 0.014736 Genus Methylomonas Low 4 12 1056214
3 100_LOW12 0.004356 Genus Methylosarcina Low 4 12 1056214
4 103_HOW12 0.095143 Genus Methylobacter High 1 12 1056217

In [9]:
one_facet = prepped_data[(prepped_data['oxy'] == "Low") & (prepped_data['rep'] == 1)]

In [10]:
one_facet_pivoted = one_facet.pivot(index='week', columns = 'taxonomic name', values = 'abundance sum')
one_facet_pivoted.head()


Out[10]:
taxonomic name Methylobacter Methylomonas Methylosarcina Methylovulum
week
4 0.205558 0.018689 0.013274 0.008513
5 0.296807 0.011619 0.012291 0.004229
6 0.507823 0.014163 0.009588 0.003858
7 0.461034 0.010989 0.005845 0.003571
8 0.429691 0.009909 0.005363 0.003600

In [11]:
one_facet_pivoted.plot(kind='bar', stacked=True)
plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))


Out[11]:
<matplotlib.legend.Legend at 0x1041d39b0>

In [13]:
ax = one_facet_pivoted.plot()
#plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
one_facet.plot(kind='scatter', x='week', y='abundance sum', label='', ax=ax);



In [14]:
one_facet.head()


Out[14]:
ID abundance sum taxonomic level taxonomic name oxy rep week project
20 109_LOW13 0.549671 Genus Methylobacter Low 1 13 1056229
21 109_LOW13 0.003904 Genus Methylovulum Low 1 13 1056229
22 109_LOW13 0.011319 Genus Methylomonas Low 1 13 1056229
23 109_LOW13 0.008294 Genus Methylosarcina Low 1 13 1056229
56 121_LOW14 0.485900 Genus Methylobacter Low 1 14 1056253

In [15]:
one_facet.plot.scatter


Out[15]:
<bound method FramePlotMethods.scatter of <pandas.tools.plotting.FramePlotMethods object at 0x10e8f0160>>

In [16]:
# Use Seaborn FacetGrid without facets:
p = sns.FacetGrid(one_facet.sort('week'), hue = 'taxonomic name', size=4, aspect=1.5) #, hue_kws={"marker": markers})
p.map(plt.scatter, 'week', 'abundance sum', s=80)
p.map(plt.plot, 'week', 'abundance sum')


-c:2: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
Out[16]:
<seaborn.axisgrid.FacetGrid at 0x10d21da90>

In [17]:
fig, ax = plt.subplots(1, 1, figsize=(5, 6))
for tup, df in one_facet.groupby('taxonomic name'):
    df.plot.scatter(x='week', y='abundance sum', ax=ax)



In [18]:
# don't want this: averaging across species. 
sns.pointplot(x="week", y='abundance sum',data=one_facet)


Out[18]:
<matplotlib.axes._subplots.AxesSubplot at 0x111dbac88>

In [ ]:
def facet_bar_plot(data, x, y, **kws): # groupby_col,
        """
        Used to fill the subplots with data.

        :param data: dataframe to plot
        :param groupby: column to group on
        :param xrotation: degrees to rotate x labels by
        :param kws: kewyord arguments for plotting
        :return:
        """
        # pivot only supports one column for now.
        # http://stackoverflow.com/questions/32805267/pandas-pivot-on-multiple-columns-gives-the-truth-value-of-a-dataframe-is-ambigu
        
        # Pass kwargs to heatmap  cmap used to be 'Blue'
        sns.barplot(x=x, y=y, data=data, **kws)
        #g.set_xticklabels(rotation=xrotation)
sns.barplot(x="day", y="total_bill", data=tips)

In [ ]:
facet_bar_plot(data=tips, x='day', y='total_bill')
# groupby_col,

In [ ]:
def plot_heatmap(plot_data, col='week', row='oxy'):
    with sns.plotting_context(font_scale=8):
        g = sns.FacetGrid(plot_data,
                          col=col,
                          row=row,
                          #size=size,
                          #aspect=aspect,
                          margin_titles=True)

    # Add axes for the colorbar.  [left, bottom, width, height]
    cbar_ax = g.fig.add_axes([.92, .3, .02, .4], title='fraction \n of reads')

    g = g.map_dataframe(facet_heatmap,
                        cbar_ax=cbar_ax,
                        # NEED vmax = MAX ABUNDANCE or each plot will have
                        # its own color scale!
                        vmin=0, vmax=max_abundance,
                        annot=annotate,
                        groupby=cols_in_facet,
                        xrotation=xrotation)
# todo: What happens if you submit a Genus for something you also # submitted an order for??? For now assume the user is smarter than that. plot_data = aggregate_mixed_taxonomy(dataframe=dataframe, taxa_dict=taxa_dict, main_dir=main_dir) # store the maximum abundance level. We will need to tell all the # sub-heat maps to use this same maximum so they aren't each on their # own scale. max_abundance = plot_data['abundance sum'].max() # The data is seperated by these two variables. # The one not used as the facet will be used as the columns in the # subplot. if facet == 'week': cols_in_facet = 'rep' else: cols_in_facet = 'week' print('plot_data.head()') print(plot_data.head()) def pivot_so_columns_are_plotting_variable(dataframe, groupby): return dataframe.pivot(index='taxonomic name', columns=groupby, values='abundance sum') def facet_heatmap(data, groupby, xrotation, **kws): """ Used to fill the subplots with data. :param data: dataframe to plot :param groupby: column to group on :param xrotation: degrees to rotate x labels by :param kws: kewyord arguments for plotting :return: """ # pivot only supports one column for now. # http://stackoverflow.com/questions/32805267/pandas-pivot-on-multiple-columns-gives-the-truth-value-of-a-dataframe-is-ambigu facet_data = pivot_so_columns_are_plotting_variable( dataframe=data, groupby=groupby) # Pass kwargs to heatmap cmap used to be 'Blue' sns.heatmap(facet_data, cmap="YlGnBu", **kws) g.set_xticklabels(rotation=xrotation) # todo: add a label at the bottom like "replicate" or "week" # currently replicate is turned into facet_replicate but should just # make a label that says replicate. Week # Control plot aesthetics depending on facet option. if facet == 'week': xrotation = 0 num_rows = len(plot_data['taxonomic name'].unique()) size = 2 * 0.2*num_rows aspect = 1 space_for_cbar = 0.85 x_axis_label = 'replicate' else: xrotation = 90 # Calculate the size, aspect depending on the number of # rows per subplot num_rows = len(plot_data['taxonomic name'].unique()) size = 0.9 + 0.2*num_rows aspect = 1.2 space_for_cbar = 0.85 x_axis_label = 'week' if size_spec: size = size_spec if aspect_spec: aspect = aspect_spec with sns.plotting_context(font_scale=8): g = sns.FacetGrid(plot_data, col=facet, row='oxy', size=size, aspect=aspect, margin_titles=True) # Add axes for the colorbar. [left, bottom, width, height] cbar_ax = g.fig.add_axes([.92, .3, .02, .4], title='fraction \n of reads') g = g.map_dataframe(facet_heatmap, cbar_ax=cbar_ax, # NEED vmax = MAX ABUNDANCE or each plot will have # its own color scale! vmin=0, vmax=max_abundance, annot=annotate, groupby=cols_in_facet, xrotation=xrotation) g.set_axis_labels(x_axis_label) # add space for x label g.fig.subplots_adjust(bottom=0.2) # todo: add an x-label for each facet (I want only 1) # g.set_axis_labels(['x label', 'ylabel']) # g.fig.subplots_adjust(top=0.2) # g.fig.text(0.5, 0.1, s='armadillo') #, *args, **kwargs) # g.fig.xlabel('ardvark') # Add space so the colorbar doesn't overlap th plot. g.fig.subplots_adjust(right=space_for_cbar) # todo: still not enough room for # Order-Burkholderiales_Methylophilales_Methylococcales-- # Phylum-Bacteroidetes--rep.pdf # add a supertitle, you bet. plt.subplots_adjust(top=0.80) supertitle = taxa_dict_to_descriptive_string(taxa_dict) g.fig.suptitle(supertitle, size=16) # Also summarise # of taxa rows being grouped together. # prepare filename and save. plot_dir = elviz_utils.prepare_plot_dir(plot_dir) print("plot directory: {}".format(plot_dir)) filepath = plot_dir + supertitle filepath += "--{}".format(facet) if annotate: filepath += "--annotated" filepath += ".pdf" print(filepath) g.fig.savefig(filepath) return g