In [21]:
fname = '/home/tiago/OTU_GG.txt'
wname = '../analysis_data/old_taxa.txt'
In [16]:
query_taxa = {}
with open(fname, 'rt') as f:
l = f.readline()
while l != '':
l = l.rstrip()
if l.startswith('Query='):
query = l.rstrip().split(' ')[1]
l = f.readline()
elif len(l) > 0 and l[0] == '>':
line = l[1:]
l = f.readline()
while l != '\n':
line += l.rstrip()
l = f.readline()
taxon = ' '.join(filter(lambda x: x.find('__') > -1, line.split(' ')))
query_taxa[query] = taxon
else:
l = f.readline()
In [23]:
with open(wname, 'wt') as w:
for query, taxon in query_taxa.items():
w.write('%s\t%s\n' % (query, taxon))