Take bedtools intersect output and make a nice table!
In [52]:
import re
import numpy as np
import pandas as pd
import seaborn as sb
%matplotlib notebook
In [47]:
values = []
with open('/Users/bergeric/data/dm6_phastcons_intersect.txt') as f:
for line in f:
#print(line)
#break
pattern = re.compile(r'.*(FBgn\w*)\t(\S*)\t.*(chr\w*)\tFlyBase\sgene\t(\w*)\t(\w*).*ID=(\w*);Name=(\w*).*\t(\S*)')
match = pattern.match(line)
TF = match.group(1)
score = float(match.group(2))
chrom = match.group(3)
start = match.group(4)
end= match.group(5)
symbol = match.group(7)
FBgn= match.group(6)
phastcon = float(match.group(8))
reorder = (TF, FBgn, symbol, chrom, start, end, score, phastcon)
values.append(reorder)
#print(np.vstack(values[:5]))
df = pd.DataFrame(values, columns=['TF','FBgn','Symbol', 'Chrom', 'Start', 'End', 'Score','Phastcons'])
df.head()
#df.to_csv('/Users/bergeric/data/bedtoolsoutput_df.txt', sep='\t', index=False)
Out[47]:
In [45]:
grp = df.groupby(['TF', 'FBgn', 'Symbol', 'Chrom', 'Start', 'End', 'Score'])
In [ ]:
In [49]:
meanframe = grp.mean()
meanframe.head()
Out[49]:
In [53]:
sb.distplot(meanframe["Phastcons"])
Out[53]:
In [ ]: