In [1]:
json_filename = 'x.json' # accidentally erased json part, re-integrate later
gexf_filename = 'gh43.gexf'
import os
#os.chdir("C:/Users/David/Documents/Dropbox")
os.chdir("C:/_Dropbox/Dropbox")
import pandas as pd
df = pd.read_csv("gh43treetable.txt", dtype=str)
In [2]:
df.columns = [['ancid', 'desc1', 'desc2', 'branchlength1', 'branchlength2']]
In [3]:
# replace spaces with underscores, then make a list of all ids with duplicates included, then remove duplicates
id_list = []
for idx, row in df.iterrows():
for field in ['ancid', 'desc1', 'desc2']:
df[field][idx] = (df[field][idx].strip()).replace(" ", "_")
id_list.append(df[field][idx])
print len(id_list)
id_list = list(set(id_list))
print len(id_list)
In [4]:
print df.iloc[2]
print id_list[:10]
In [5]:
# make dicts of IDs and positions in id_list
dict_pti = {}
dict_itp = {}
for pos in range(len(id_list)):
dict_pti[pos] = id_list[pos]
dict_itp[id_list[pos]] = pos
In [6]:
#make link list
link_list = []
for idx, row in df.iterrows():
for descnum in ['desc1', 'desc2']:
templist = []
templist.append(df.ancid[idx])
templist.append(df[descnum][idx])
link_list.append(templist)
print link_list[:10]
In [8]:
# write gexf
with open(gexf_filename, "w") as f:
f.write('<gexf xmlns="http://www.gexf.net/1.2draft" version="1.2">\n <meta lastmodifieddate="2009-03-20">\n <creator>Gexf.net</creator>\n')
with open(gexf_filename, "a") as f:
f.write(' <description>')
f.write(gexf_filename)
f.write('</description>\n </meta>\n <graph mode="static" defaultedgetype="directed">\n')
f.write(' <nodes>\n')
for pos in range(len(id_list)):
f.write(' <node id="')
f.write(str(pos))
f.write('" label="')
f.write(str(dict_pti[pos]))
f.write('" />\n')
f.write(' </nodes>\n')
f.write(' <edges>\n')
for i in range(len(link_list)):
f.write(' <edge id="')
f.write(str(i))
f.write('" source="')
f.write(str(dict_itp[link_list[i][0]]))
f.write('" target="')
f.write(str(dict_itp[link_list[i][1]]))
f.write('" />\n')
f.write(' </edges>\n')
f.write(' </graph>\n')
f.write('</gexf>\n')
In [ ]: