In [22]:
import numpy as np
import pandas as pd
%matplotlib notebook
In [41]:
with open('/Users/bergeric/Projects/s2rnai/output/phastcons_workflow/outfile.txt') as f:
df = pd.read_table(f, lineterminator='\n')
df['cnt'] = 1
df.head()
Out[41]:
In [25]:
type(df)
Out[25]:
In [57]:
grp = df[['TF', 'FBgn', 'cnt']].groupby(['TF','FBgn'])
df1 = grp.sum()
df1
Out[57]:
In [59]:
df2 = df1.copy()
df2['cnt'] = 1
df3 = df2.reset_index().groupby('TF').sum()
print(df3.shape)
df3
Out[59]:
In [52]:
df1.loc['FBgn0003499'].shape
Out[52]:
In [69]:
cols = ['patname', 'seqname','start', 'stop', 'strand','score','pval','qval','matchedseq']
fimodf = pd.read_table('/Users/bergeric/Projects/s2rnai/data/motif_alignments_flyFactor_dm6.2L.txt', header=None, skiprows=1, names=cols)
fimodf.head()
Out[69]:
In [73]:
fimodf.patname.unique().shape
Out[73]:
In [76]:
onthefly_df = pd.read_table('/Users/bergeric/Projects/s2rnai/output/phastcons_workflow/onTheFly/outfile.txt', lineterminator='\n')
onthefly_df['cnt']=1
onthefly_df.head()
Out[76]:
In [79]:
grp2 = onthefly_df[['TF', 'FBgn', 'cnt']].groupby(['TF','FBgn'])
flydf = grp2.sum()
flydf
Out[79]:
In [80]:
flydf2 = flydf.copy()
flydf2['cnt'] = 1
flydf3 = flydf2.reset_index().groupby('TF').sum()
print(flydf3.shape)
flydf3
Out[80]:
In [81]:
flydf.loc['OTF0063.1'].shape
Out[81]:
In [ ]: