import sys
In [1]:
import pandas as pd
In [2]:
import seaborn as sns
In [3]:
import matplotlib as mpl
import matplotlib.pyplot as plt
In [4]:
%matplotlib inline
In [5]:
import aggregate_mummer_results
In [6]:
full_data = pd.read_csv("percent_identities.tsv" ,sep = '\t')
In [7]:
full_data.head(3)
Out[7]:
In [8]:
organism_names = full_data['query name'].unique()
In [9]:
organism_names
Out[9]:
In [10]:
plot_names = [n for n in organism_names if "Methylotenera" in n] + \
[n for n in organism_names if "Acidovorax" in n]
In [11]:
plot_names
Out[11]:
In [12]:
def only_selected_query_and_ref(name_list):
all_data = pd.read_csv("percent_identities.tsv" ,sep = '\t')
all_data = all_data[all_data['query name'].isin(name_list)]
all_data = all_data[all_data['ref name'].isin(name_list)]
print("num rows selected: {}".format(all_data.shape[0]))
return all_data
In [13]:
plot_data = only_selected_query_and_ref(plot_names)
In [14]:
ax = plt.axes()
sns.heatmap(aggregate_mummer_results.pivot_identity_table(plot_data), ax = ax, )
ax.set_title('% identity \n (length-weighted)')
ax.figure.tight_layout()
ax.figure.set_size_inches(w=4, h=6)
ax.figure.savefig('160601_original_percent_identity_measure.pdf')
In [15]:
plot_data.head()
Out[15]:
In [16]:
ax = plt.axes()
sns.heatmap(aggregate_mummer_results.pivot_identity_table(plot_data, value_var='estimated % identity'),
ax = ax)
ax.set_title('(% identity)*(fraction aligned))')
ax.figure.tight_layout()
ax.figure.set_size_inches(w=4, h=6)
ax.figure.savefig('160601_original_percent_identity_tims_frac_aligned.pdf')
In [17]:
def subset_given_colnames(name_list):
full_data = pd.read_csv("percent_identities.tsv" ,sep = '\t')
all_names = full_data['query name'].unique()
# build a list of names to pick out.
plot_names = []
for org_name in name_list:
plot_names += [n for n in organism_names if org_name in n]
# reduce to the desired organisms.
selected_data = full_data.copy()
selected_data = selected_data[selected_data['query name'].isin(plot_names)]
selected_data = selected_data[selected_data['ref name'].isin(plot_names)]
print("num rows selected: {}".format(selected_data.shape[0]))
return selected_data
In [18]:
def plot_metrics_as_heatmaps(metric_list, organism_list, figsize=(10, 6),
filename = None):
print(len(metric_list))
fig, axn = plt.subplots(1, len(metric_list),
sharex=True, sharey=True,
figsize=figsize)
cbar_ax = fig.add_axes([.91, .3, .03, .4])
data = subset_given_colnames(name_list = organism_list)
data['% of query aligned'] = data['frac of query aligned']*100
for i, metric in enumerate(metric_list):
# prepare pivoted data
print("i: {}, metric: {}".format(i, metric))
subplot_ax = axn[i]
print('axis: {}'.format(subplot_ax))
subplot_data = aggregate_mummer_results.pivot_identity_table(data,
value_var=metric)
sns.heatmap(subplot_data, ax=axn[i],
cbar=i == 0,
vmin=0, vmax=100,
cbar_ax=None if i else cbar_ax
)
subplot_ax.set_title(metric)
fig.tight_layout(rect=[0, 0, .9, 1])
print(type(fig))
print(type(axn))
if filename is not None:
fig.savefig(filename)
fig.savefig(filename.rstrip('pdf') + 'svg')
In [19]:
mpl.rcParams.update({'axes.titleweight': 600})
In [20]:
p = plot_metrics_as_heatmaps(['% identity', '% of query aligned', 'estimated % identity'],
['Methylotenera mobilis', 'Acidovorax'],
figsize=(11, 4),
filename = '160601_ANI_metric_development.pdf')
#p.figure.savefig()
#p.figure.savefig('160601_ANI_metric_development.svg')
In [24]:
p = plot_metrics_as_heatmaps(['% of query aligned', 'estimated % identity'],
organism_names,
figsize=(20, 15),
filename = '160603_all_Fauzi--percent_aligned_an_percent_identity.pdf')
#p.figure.savefig()
#p.figure.savefig('160601_ANI_metric_development.svg')
In [30]:
p = plot_metrics_as_heatmaps(['% of query aligned', 'estimated % identity'],
['Methylophilus methylotrophus'],
figsize=(7, 4.5),
filename = '160603_Fauzi_Methylophilus_methylotrophus--percent_aligned_and_percent_identity.pdf')
#p.figure.savefig()
#p.figure.savefig('160601_ANI_metric_development.svg')
In [ ]: