Parse phastcon scores .wigfix file
In [6]:
    
import numpy as np
import pandas as pd
import seaborn as sb
import matplotlib.pyplot as plt
import re
%matplotlib notebook
    
In [2]:
    
IN_2L = False
counter = 0
values_2L = []
with open('dm6.27way.phastCons.wigFix') as f:
    for line in f:
        if 'fixedStep' in line: 
            pattern = re.compile(r'^fixedStep\schrom=chr(\w*)\s\w*=(\d*)\s\w*=(\d)')
            match = pattern.match(line)
            
            chrom = match.group(1)
            start = int(match.group(2))
            step = int(match.group (3))
            
            if chrom=="2L":
                IN_2L = True
                counter = start
            else:
                IN_2L = False
        else:
            if IN_2L:
                row = (chrom, counter , float(line.strip()))
                values_2L.append(row)
                counter += step
print(np.vstack(values_2L[:5]))
    
    
In [3]:
    
df = pd.DataFrame(values_2L, columns=['chrom','position','score'])
df.head()
    
    Out[3]:
In [8]:
    
sb.distplot(df["score"])
    
    
    
    Out[8]:
In [9]:
    
df.shape
    
    Out[9]:
In [ ]: