In [26]:
json_filename = 'x.json'
gexf_filename = 'x.gexf'

import os
#os.chdir("C:/Users/David/Documents/Dropbox")
os.chdir("C:/_Dropbox/Dropbox")

import pandas as pd
df = pd.read_csv("gh43treetable.txt", dtype=str)

In [27]:
df.columns = [['ancid', 'desc1', 'desc2', 'branchlength1', 'branchlength2']]

In [28]:
# replace spaces with underscores, then make a list of all ids with duplicates included, then remove duplicates
id_list = []
for idx, row in df.iterrows():
    for field in ['ancid', 'desc1', 'desc2']:
        df[field][idx] = (df[field][idx].strip()).replace(" ", "_")
        id_list.append(df[field][idx])
print len(id_list)
id_list = list(set(id_list))
print len(id_list)


639
427

In [29]:
print df.iloc[2]
print id_list[:10]


ancid                      217
desc1                      216
desc2             ASPNI_A2QT85
branchlength1     0.0000000000
branchlength2     0.0000000000
Name: 2, dtype: object
['Paeby1p7_018872', 'Paeby1p7_018871', 'bri_CHGT_02150', 'ABN43C_PENCH', '344', '345', 'ABN43A_PENCH', 'CORTH_1_02834', '340', '341']

In [31]:
# make dicts of IDs and positions in id_list
dict_pti = {}
dict_itp = {}
for pos in range(len(id_list)):
    dict_pti[pos] = id_list[pos]
    dict_itp[id_list[pos]] = pos

In [36]:
#make link list
link_list = []
for idx, row in df.iterrows():
    for descnum in ['desc1', 'desc2']:
        templist = []
        templist.append(df.ancid[idx])
        templist.append(df[descnum][idx])
        link_list.append(templist)
print link_list[:10]


[['215', 'ABN43A_ASPNG'], ['215', 'ANIG203143G'], ['216', '215'], ['216', 'ASPNI_P42256'], ['217', '216'], ['217', 'ASPNI_A2QT85'], ['218', 'PENCH_Q5H7M8'], ['218', 'ABN43A_PENCH'], ['219', 'Paeby1p7_018872'], ['219', '218']]

In [52]:
# write gexf
with open(json_filename, "w") as f:
    f.write('<gexf xmlns="http://www.gexf.net/1.2draft" version="1.2">\n    <meta lastmodifieddate="2009-03-20">\n        <creator>Gexf.net</creator>\n')
with open(json_filename, "a") as f:
    f.write('        <description>')
    f.write(
    for pos in range(len(id_list)):
        f.write('    {"name": "' + dict_pti[pos] + '"}')
        if pos < len(id_list) - 1:
            f.write(',\n')
        else:
            f.write('\n')
    f.write('  ],\n  "links": [\n')
    for pos in range(len(link_list)):
        f.write('    {"source": ' + str(dict_itp[link_list[pos][0]]) + ', "target": ' + str(dict_itp[link_list[pos][1]]) + '}')
        if pos < len(link_list) - 1:
            f.write(',\n')
        else:
            f.write('\n')
    f.write('  ]\n}')

In [ ]:
# write gexf
with open(gexf_filename, "w") as f:
    f.write(