In [36]:
#Script to reformat methane data files for two-level Sankey
#Output: tsv file
#Created: 30.05.2016
#Last modified:
In [1]:
import numpy as np
import pandas as pd
import collections
import os
import xlrd
In [2]:
regions_dict = {
"Bor_NAme": "1", "contUSA": "2", "contUSA": "3",
"Trop_SAme": "4", "Temp_SAme": "5", "NAfr": "6",
"SAfr": "7", "Russia": "8", "Oceania": "9",
"Europe": "10", "China": "11", "India": "12",
"SE_Asia": "13", "Temp_Eurasia_Japan": "14"
};
In [3]:
df = pd.read_csv('test.csv', sep="\t", skiprows=[0], header = None)
#dataTD.columns = ["a", "b", "c", "d", "etc."]
df
Out[3]:
In [4]:
df_TD = df.ix[1:]
df_TD.columns = ["proc", "stats", "Bor_NAme", "contUSA", "CentName", "Trop_SAme"]
df_TD
Out[4]:
In [5]:
# Create dataframe from flux file
#xl_TD = pd.ExcelFile("Sankey_TD_2003-2012_25MAy2016.xls")
#xl_TD.sheet_names
#df_TD = xl.parse("Sankey_TD_2003-2012_25MAy2016", skiprows=[0])
#df_TD.head(10)
In [6]:
#Pivot table so that Region names are listed in one Region column, and stats Mean, Min, Max become three columns
#http://stackoverflow.com/questions/29942167/transposing-one-column-in-python-pandas-with-the-simplest-index-possible
#dc.pivot(columns='disease_header', index='medicine', values='disease').reset_index()
df_TD_piv = df_TD.pivot(columns='stats', index='proc').reset_index()
#df05_piv = df05_piv.drop(['CLASS'], 1)
df_TD_piv
Out[6]:
In [7]:
test = df_TD.pivot('proc', 'stats')
test
Out[7]:
In [8]:
#test.iloc[:,3].values #col values
test.columns[0][0] #Bor_NAme
test.columns[3][0] #contUSA
test.columns[6][0] #CentName
test.columns[9][0] #Trop_SAme
Out[8]:
In [49]:
test['Bor_NAme']['mean']
Out[49]:
In [58]:
test['Bor_NAme'].loc['Agriwast']['mean']
Out[58]:
In [59]:
test['Bor_NAme'].loc['BioBurBiof']['mean']
Out[59]:
In [28]:
#Store region names in list
numRegions = test.shape[1] / 3
idx = 0
sources = []
for num in range(0,numRegions):
sources.append(test.columns[idx][0])
idx = idx + 3
sources
Out[28]:
In [41]:
#Get target list
targets = test.index.tolist()
targets
Out[41]:
In [42]:
nodes = sources + targets
nodes
Out[42]:
In [60]:
idx = 0
for source in sources:
print source
for target in targets:
#print idx
print target
print test[source].loc[target]['mean']
In [66]:
file = open('/homel/cnangini/PROJECTS/MethaneEmissions/sankey_TB_means.json', 'w')
file.write('{\n')
file.write('"nodes": [\n')
for node in nodes:
file.write('{"name": "%s"},\n' %(node))
# remove last comma
file.seek(-2, os.SEEK_END)
file.truncate()
file.write('\n],\n')
file.write('"links": [\n')
for source in sources:
print source
for target in targets:
print target
print test[source].loc[target]['mean']
value = test[source].loc[target]['mean']
file.write('{"source": "%s", "target": "%s", "mean": "%.2f"},\n' %(source, target, float(value)))
# remove last comma
file.seek(-2, os.SEEK_END)
file.truncate()
file.write('\n]\n')
file.write('}\n')
file.close()
In [ ]: