In [ ]:
In [ ]:
Take bedtools intersect output and make a nice table!
In [2]:
import re
import numpy as np
import pandas as pd
import seaborn as sb
%matplotlib notebook
In [7]:
values = []
with open('.../output/phastcons_workflow/dm6_phastcons_intersect.txt') as f:
for line in f:
pattern = re.compile(r'\w*\t(\w*)\t(\w*)\t(FBgn\w*)\t(\S*)\t.*(chr\w*)\tFlyBase\sgene\t\w*\t\w*.*ID=(\w*);Name=(\w*).*\t(\S*)\t\w*')
match = pattern.match(line)
TF = match.group(3)
qval = float(match.group(4))
chrom = match.group(5)
start = match.group(1)
end= match.group(2)
symbol = match.group(7)
FBgn= match.group(6)
phastcon = float(match.group(8))
reorder = (TF, FBgn, symbol, chrom, start, end, qval, phastcon)
values.append(reorder)
#print(np.vstack(values[:5]))
df = pd.DataFrame(values, columns=['TF','FBgn','Symbol', 'Chrom', 'Start', 'End', 'q Value','Phastcons'])
df.head()
#df.to_csv('/Users/bergeric/data/bedtoolsoutput_df.txt', sep='\t', index=False)
Out[7]:
In [8]:
grp = df.groupby(['TF', 'FBgn', 'Symbol', 'Chrom', 'Start', 'End', 'q Value'])
In [ ]:
In [9]:
meanframe = grp.mean()
meanframe.head()
Out[9]:
In [10]:
sb.distplot(meanframe["Phastcons"])
Out[10]:
In [ ]: