In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%run ../../code/colors-and-styles/empcolors.py
In [2]:
# seaborn plot settings
sns.set_context('talk')
sns.set_style('white')
In [3]:
# input file paths (.csv removed)
paths_phylum_empo2 = [
'../../data/nestedness/nest_phylum_Animal',
'../../data/nestedness/nest_phylum_Plant',
'../../data/nestedness/nest_phylum_Saline',
'../../data/nestedness/nest_phylum_Non-saline']
paths_phylum_all = ['../../data/nestedness/nest_phylum_allsamples']
In [4]:
def plot_nestedness_big(path, taxlevel, xmax, ymax, legendloc, legendcol):
df = pd.read_csv('%s.csv' % path)
# df = df[df['OBSERVATION_ID'] != 'unclassified'] # if we want to remove unclassified phylum
empolevel = path.split('/')[-1].split('_')[-1]
if xmax == 'auto':
xmax = df.SAMPLE_RANK.max()
if ymax == 'auto':
ymax = df.OBSERVATION_RANK.max()
fig = plt.figure(figsize=(500/30, 80/12.7), facecolor=None) # xmax/30 ymax/12.7
for empo3 in np.sort(df.empo_3.unique()):
plt.scatter(df[df.empo_3 == empo3].SAMPLE_RANK, df[df.empo_3 == empo3].OBSERVATION_RANK, marker='|',
linewidths=2, label=empo3, color=get_empo_cat_color(empo3))
plt.xlabel('%s samples (sorted by richness)' % empolevel, fontsize=24)
plt.ylabel('%s (sorted by prevalence)' % taxlevel, fontsize=24)
plt.tick_params(axis='both', which='major', labelsize=20)
plt.axis([0, xmax+1, 0, ymax+0.8])
#plt.legend(loc=legendloc, ncol=legendcol, markerscale=4, handletextpad=0, fontsize=18) # bbox_to_anchor=(0.5, 1+0.08*80/ymax),
#plt.tight_layout()
fig.patch.set_alpha(0.0)
plt.savefig('%s.pdf' % path)
return df, empolevel
In [5]:
def plot_nestedness(path, taxlevel, xmax, ymax, legendloc, legendcol):
df = pd.read_csv('%s.csv' % path)
# df = df[df['OBSERVATION_ID'] != 'unclassified'] # if we want to remove unclassified phylum
empolevel = path.split('/')[-1].split('_')[-1]
if xmax == 'auto':
xmax = df.SAMPLE_RANK.max()
if ymax == 'auto':
ymax = df.OBSERVATION_RANK.max()
fig = plt.figure(figsize=(400/30, 80/12.7), facecolor=None) # xmax/30 ymax/12.7
for empo3 in np.sort(df.empo_3.unique()):
plt.scatter(df[df.empo_3 == empo3].SAMPLE_RANK, df[df.empo_3 == empo3].OBSERVATION_RANK, marker='|',
linewidths=2, label=empo3, color=get_empo_cat_color(empo3))
plt.xlabel('%s samples (sorted by richness)' % empolevel, fontsize=24)
plt.ylabel('%s (sorted by prevalence)' % taxlevel, fontsize=24)
plt.tick_params(axis='both', which='major', labelsize=20)
plt.axis([0, xmax+1, 0, ymax+0.8])
#plt.legend(loc=legendloc, ncol=legendcol, markerscale=4, handletextpad=0, fontsize=18) # bbox_to_anchor=(0.5, 1+0.08*80/ymax),
#plt.tight_layout()
fig.patch.set_alpha(0.0)
plt.savefig('%s.pdf' % path)
return df, empolevel
In [6]:
def top_taxa(df):
return df[['OBSERVATION_RANK', 'OBSERVATION_ID']].sort_values(
'OBSERVATION_RANK', ascending=False).drop_duplicates().reset_index(drop=True)
In [7]:
panels_dict = {}
for path in paths_phylum_all:
df, empolevel = plot_nestedness_big(path, 'Phyla', 'auto', 'auto', 3, 4)
panels_dict[empolevel] = df
In [8]:
for path in paths_phylum_empo2:
df, empolevel = plot_nestedness(path, 'Phyla', 'auto', 'auto', 3, 1)
panels_dict[empolevel] = df
In [9]:
for key in sorted(panels_dict):
print('\n' + key)
print(panels_dict[key]['OBSERVATION_ID'].value_counts().head(8))
In [ ]: