This would probably make more sense as a script, but I did not bother to convert it. Will spit out a pickled file for other use.
In [19]:
import pandas as pd
import re
import glob
import cPickle as cpk
from IPython.core.debugger import Tracer #used this to step into the function and debug it, also need line with Tracer()()
Now go in a read one BRITE file (originally for the compounds)
In [101]:
textC = re.compile('\d+')
mc = textC.search(line)
mc.group(0)
#so, now I have the map number...how do I get the rest of the line?
Out[101]:
In [116]:
textC = re.compile(r'(\d+)\s*(.*)$')
mC = textC.search(line)
print mC.group(1)
print mC.group(2)
In [ ]:
In [134]:
def ReadBRITEfile(briteFile):
forBrite = pd.DataFrame(columns = ['map','A','B','C','wholeThing'])
# set up the expressions to match each level in the BRITE hierarchy
textA = re.compile(r'(^A<b>)(.+)(</b>)\s*(.*)$')
textB = re.compile(r'(^B)\s*(.*)$')
textC = re.compile(r'(\d+)\s*(.*)$')
#this relies on the fact that the rows are in order: A, with B subheadings, then C subheadings
setA = []
idxA = []
setB = []
setC = []
with open(briteFile) as f:
for idx,line in enumerate(f):
if line[0] is not '#': #skip over the comments
mA = textA.search(line)
mB = textB.search(line)
mC = textC.search(line)
if mA:
setA = mA.group(2)
#house cleaning (probably c)
idxA = idx
forBrite.loc[idx,'A'] = setA
forBrite.loc[idx,'wholeThing'] = line #using this as a double check for now
#forBrite.loc[idx,'map'] = mC.group(1)
elif mB:
setB = mB.group(2)
forBrite.loc[idx,'A'] = setA
forBrite.loc[idx,'B'] = setB
forBrite.loc[idx,'wholeThing'] = line
#forBrite.loc[idx,'map'] = mC.group(1)
elif mC:
#Tracer()()
setC = mC.group(2)
forBrite.loc[idx,'A'] = setA
forBrite.loc[idx,'B'] = setB
forBrite.loc[idx,'C'] = setC
forBrite.loc[idx,'wholeThing'] = line
forBrite.loc[idx,'map'] = mC.group(1)
return forBrite
In [153]:
D = glob.glob('*keg.txt')
allBRITE=[]
for idx,nof in enumerate(D):
allBRITE = ReadBRITEfile(nof)
In [138]:
type(allBRITE)
Out[138]:
In [146]:
allBRITE.loc[allBRITE['map']=='01100']
Out[146]:
In [147]:
allBRITE.loc[allBRITE['map']=='01100'].C
Out[147]:
In [148]:
type(allBRITE.loc[allBRITE['map']=='01100'].C)
Out[148]:
In [ ]:
In [137]:
allBRITE
Out[137]:
In [ ]:
In [151]:
#now...save all that so I don't have to do this everytime
cpk.dump(allBRITE, open('BRITE_pathwaysOnly.pickle', 'wb'))
In [152]:
cpk.load(open('BRITE_pathwaysOnly.pickle','rb'))
Out[152]:
In [ ]: