In [28]:
%pylab inline
%config InlineBackend.figure_format = 'retina'
import seaborn as sns
import pandas as pd
sns.set_style('ticks')
In [29]:
data = pd.read_csv('Ast_g.heptamerbias.csv', header=None, names=['kmer', 1, 2, 3, 4, 5, 6, 7, 8])
In [32]:
data.head()
Out[32]:
In [30]:
figsize(16,12)
for d in range(2,9):
sns.distplot(data[d], hist=False, label='degree={0}'.format(d))
legend()
Out[30]:
In [33]:
P_h = pd.Series(dict(zip(data.kmer,
data[list(range(1,9))].sum(axis=1) / data.set_index('kmer')[list(range(1,9))].sum(axis=1).sum())))
In [34]:
P_h.head()
Out[34]:
In [35]:
P_D = data[list(range(1,9))].sum(axis=0) / data[list(range(1,9))].sum(axis=0).sum()
In [36]:
P_D
Out[36]:
In [37]:
tidy = pd.melt(data, id_vars=['kmer'], value_vars=[1,2,3,4,5,6,7,8], var_name='degree', value_name='heptamer_count')
In [38]:
tidy.head()
Out[38]:
In [39]:
tidy['P(h|D)'] = (tidy.set_index(['degree', 'kmer']) / tidy.groupby('degree').sum()).reset_index()['heptamer_count']
In [40]:
tidy.head()
Out[40]:
In [41]:
def p_D_h(row):
return (row['P(h|D)'] * P_D[row.degree]) / P_h[row.kmer]
tidy['P(D|h)'] = tidy.apply(p_D_h, axis=1)
In [42]:
tidy.sort_values(['kmer', 'degree'])
Out[42]:
P(D|h) = P(h|D) * P(D) / P(h)
In [31]:
tidy.sort_values('heptamer_prob', ascending=False)
Out[31]:
In [43]:
sns.distplot(tidy.heptamer_prob, hist=False)
vlines(.25 ** 7, 0, plt.gca().get_ylim()[1], label='Random heptamer prob')
legend()
Out[43]:
In [2]:
paths = pd.read_csv('Ast_g.paths.csv')
In [13]:
paths.set_index('t', inplace=True)
In [4]:
for name, group in paths.groupby('kmer'):
plot(group.t, group.path_len)
In [14]:
paths.head()
Out[14]:
In [27]:
ma = paths.path_len.rolling(1000).mean()
mstd = paths.path_len.rolling(1000).std()
plot(ma.index, ma)
fill_between(mstd.index, ma-2*mstd, ma+2*mstd, color='b', alpha=0.2)
Out[27]:
In [ ]: