Parse phastcon scores .wigfix file
In [6]:
import numpy as np
import pandas as pd
import seaborn as sb
import matplotlib.pyplot as plt
import re
%matplotlib notebook
In [2]:
IN_2L = False
counter = 0
values_2L = []
with open('dm6.27way.phastCons.wigFix') as f:
for line in f:
if 'fixedStep' in line:
pattern = re.compile(r'^fixedStep\schrom=chr(\w*)\s\w*=(\d*)\s\w*=(\d)')
match = pattern.match(line)
chrom = match.group(1)
start = int(match.group(2))
step = int(match.group (3))
if chrom=="2L":
IN_2L = True
counter = start
else:
IN_2L = False
else:
if IN_2L:
row = (chrom, counter , float(line.strip()))
values_2L.append(row)
counter += step
print(np.vstack(values_2L[:5]))
In [3]:
df = pd.DataFrame(values_2L, columns=['chrom','position','score'])
df.head()
Out[3]:
In [8]:
sb.distplot(df["score"])
Out[8]:
In [9]:
df.shape
Out[9]:
In [ ]: