In [26]:
# Testing the vienna rnafold package
import os
import sys
import re
sys.path.append('/home/jaggu/research/downloads/vienna/viennaRNA/lib/python2.7/site-packages')
import RNA
dotNot = RNA.fold('CCCGGCGTGGG')[0]
print dotNot
re.findall("\(+",dotNot)
_rep = re.sub("\(+","S",dotNot)
_rep = re.sub("\)+","S",_rep)
slNotation = re.sub("\.+","L",_rep)
Out[26]:
In [1]:
import os
import sys
import cPickle as pickle
import time
def loadPkl(fname):
pklDir = '/home/jaggu/research/projectFiles/operons/pklFiles'
f = os.path.join(pklDir,fname)
db = pickle.load(open(f))
return db
def savePkl(db,pklFname):
pklDir = '/home/jaggu/research/projectFiles/operons/pklFiles'
f = os.path.join(pklDir,pklFname)
pickle.dump(db,open(f,'w'))
return
In [ ]:
# Importing dictionaries
org_lTagPairIGSeq_dict = loadPkl('org_locusTagPairInterGeneSeq.dict.pkl')
locus_cog_dict = loadPkl('locus_cog.dict.pkl')
print "Org_lTagPairIGSeq_dict loaded",time.ctime()
In [6]:
# Parsing each IG seq as a RNA and by using RNAfold from Vienna, I reading out an SLNotation (Stem,Loop;).
# Then every COG pair gets a list of these secondarystructure. Then I parse the COG pairs to get the frequency
# of each secondary structure
sys.path.append('/home/jaggu/research/downloads/vienna/viennaRNA/lib/python2.7/site-packages')
import RNA
import re
def getSStr(dnaSeq):
dotNot = RNA.fold(dnaSeq)[0]
re.findall("\(+",dotNot)
_rep = re.sub("\(+","S",dotNot)
_rep = re.sub("\)+","S",_rep)
slNotation = re.sub("\.+","L",_rep)
return slNotation
for org,lTagIGSeq_list in org_lTagPairIGSeq_dict.items():
print org, len(lTagIGSeq_list)
for lTag1,lTag2,dnaSeq in lTagIGSeq_list:
print lTag1,lTag2
secStr = getSStr(dnaSeq)
print dnaSeq, secStr
sys.exit(1)
break;
In [ ]: