In [ ]:
from collections import defaultdict
import os
from os import path

In [ ]:
def fname2id(fname):
    fname = path.basename(fname)
    exts = ["_il.fastq.gz", ".fastq.gz"]
    for ext in exts:
        if fname.endswith(ext):
            fname=fname[:-len(ext)]
            break
    return fname

In [ ]:
fh = open("data/2841.mashdist")
table = [line.strip().split('\t')
         for line in fh]

In [ ]:
dists = defaultdict(dict)

In [ ]:
for dist in table:
    id1 = fname2id(dist[0])
    id2 = fname2id(dist[1])
    dist = float(dist[2])
    dists[id1][id2] = dist

In [ ]:
ofile = open("2841.dist", 'w')
ids = [''] + list(sorted(dists.keys()))
print(*ids, sep='\t', file=ofile)
for id1, row in sorted(dists.items()):
    rowdists = [it[1] for it in sorted(row.items())]
    print(id1, *rowdists, sep='\t', file=ofile)
ofile.close()