In [ ]:
from collections import defaultdict
import os
from os import path
In [ ]:
def fname2id(fname):
fname = path.basename(fname)
exts = ["_il.fastq.gz", ".fastq.gz"]
for ext in exts:
if fname.endswith(ext):
fname=fname[:-len(ext)]
break
return fname
In [ ]:
fh = open("data/2841.mashdist")
table = [line.strip().split('\t')
for line in fh]
In [ ]:
dists = defaultdict(dict)
In [ ]:
for dist in table:
id1 = fname2id(dist[0])
id2 = fname2id(dist[1])
dist = float(dist[2])
dists[id1][id2] = dist
In [ ]:
ofile = open("2841.dist", 'w')
ids = [''] + list(sorted(dists.keys()))
print(*ids, sep='\t', file=ofile)
for id1, row in sorted(dists.items()):
rowdists = [it[1] for it in sorted(row.items())]
print(id1, *rowdists, sep='\t', file=ofile)
ofile.close()