In [1]:
import re
import pandas as pd
import numpy as np
import seaborn as sb
import matplotlib.pyplot as plt
%matplotlib notebook
In [2]:
TFtable = []
hits_list = []
with open('/Users/bergeric/Projects/s2rnai/data/hitcount.txt') as g:
for line in g:
if not line.strip().startswith('1 ##gff-version'):
hits = int(line.split()[0])
hits_list.append(hits)
TF = line.split()[1]
if hits > 100:
TFtable.append((TF))
hitdf = pd.DataFrame(TFtable, columns=['TF'])
hitdf.head()
Out[2]:
In [198]:
table = []
with open('/Users/bergeric/Projects/s2rnai/data/motif_alignments_dmmpmm2009_dm6.gff') as f:
for line in f:
if not line.startswith('##'):
broken = line.split('\t')
pattern = broken[8].split(';')[3]
pval = float(broken[8].split(';')[2][7:])
qval = float(pattern[7:].strip())
TF = broken[8].split(';')[0][5:]
gene = broken[0]
stuff = (TF, gene, qval, pval)
table.append(stuff)
In [199]:
df= pd.DataFrame(table, columns=['TF', 'gene', 'q-value', 'p-value'])
df.head()
Out[199]:
In [200]:
grp1 = df.groupby('TF')
grp1.describe()
Out[200]:
In [203]:
indexTF = df.set_index(['TF'])
df2 = df[["TF","q-value","p-value"]]
#sub = df2.iloc[:5000,:]
groups = df2.groupby(['TF'])
print(groups['q-value'])
In [30]:
fig, axes = plt.subplots(8,2)
In [192]:
x= groups['q-value']
fig, axes = plt.subplots(6,6, figsize=(12,12), sharex=True)
#axr = axes.ravel()
#zip(groups, axes.flatten())
for ax, x in zip(axes.flat, x):
sb.distplot(x[1], ax=ax)
ax.set_title(x[0])
ax.axvline(0.05, color='r', ls=':')
#axes.flat[-1].set_visible(False)
ax.set_xlim(0,1)
plt.tight_layout()
In [80]:
fig, ax = plt.subplots(1, 1)
print(x[1])
sb.distplot(x[1]['q-value'], hist=False, ax=ax, )
ax.set_title(x[0])
ax.axvline(0.05, color='r', ls=':')
In [55]:
plt.gca()
plt.show()
In [207]:
x= groups['p-value']
fig, axes = plt.subplots(6,6, figsize=(12,12), sharex=True)
#axr = axes.ravel()
#zip(groups, axes.flatten())
for ax, x in zip(axes.flat, x):
sb.distplot(x[1], ax=ax)
ax.set_title(x[0])
ax.axvline(0.05, color='r', ls=':')
#axes.flat[-1].set_visible(False)
ax.set_xlim(0,0.001)
plt.tight_layout()
In [ ]: