notebook.community

Edit and run



In [36]:

    
#Script to reformat methane data files for two-level Sankey
#Output: tsv file

#Created:           30.05.2016
#Last modified:



In [1]:

    
import numpy as np
import pandas as pd
import collections
import os
import xlrd



In [2]:

    
regions_dict = {
              "Bor_NAme": "1", "contUSA": "2", "contUSA": "3",
              "Trop_SAme": "4", "Temp_SAme": "5", "NAfr": "6",
              "SAfr": "7", "Russia": "8", "Oceania": "9",
              "Europe": "10", "China": "11", "India": "12",
              "SE_Asia": "13", "Temp_Eurasia_Japan": "14"
            };



In [3]:

    
df = pd.read_csv('test.csv', sep="\t", skiprows=[0], header = None)
#dataTD.columns = ["a", "b", "c", "d", "etc."]
df



In [4]:

    
df_TD = df.ix[1:]
df_TD.columns = ["proc", "stats", "Bor_NAme", "contUSA", "CentName", "Trop_SAme"]
df_TD



In [5]:

    
# Create dataframe from flux file

#xl_TD = pd.ExcelFile("Sankey_TD_2003-2012_25MAy2016.xls")
#xl_TD.sheet_names
#df_TD = xl.parse("Sankey_TD_2003-2012_25MAy2016",  skiprows=[0])
#df_TD.head(10)



In [6]:

    
#Pivot table so that Region names are listed in one Region column, and stats Mean, Min, Max become three columns
#http://stackoverflow.com/questions/29942167/transposing-one-column-in-python-pandas-with-the-simplest-index-possible
#dc.pivot(columns='disease_header', index='medicine', values='disease').reset_index()

df_TD_piv = df_TD.pivot(columns='stats', index='proc').reset_index()
#df05_piv = df05_piv.drop(['CLASS'], 1)
df_TD_piv



In [7]:

    
test = df_TD.pivot('proc', 'stats')
test



In [8]:

    
#test.iloc[:,3].values #col values
test.columns[0][0] #Bor_NAme
test.columns[3][0] #contUSA
test.columns[6][0] #CentName
test.columns[9][0] #Trop_SAme









    Out[8]:





'Trop_SAme'



In [49]:

    
test['Bor_NAme']['mean']









    Out[49]:





proc
Agriwast       3
BioBurBiof     1
Fossil         2
OtherNat       2
Wetlands      13
Name: mean, dtype: object



In [58]:

    
test['Bor_NAme'].loc['Agriwast']['mean']









    Out[58]:





'3'



In [59]:

    
test['Bor_NAme'].loc['BioBurBiof']['mean']









    Out[59]:





'1'



In [28]:

    
#Store region names in list
numRegions = test.shape[1] / 3

idx = 0
sources = []
for num in range(0,numRegions):
    sources.append(test.columns[idx][0])
    idx = idx + 3
   
sources









    Out[28]:





['Bor_NAme', 'contUSA', 'CentName', 'Trop_SAme']



In [41]:

    
#Get target list
targets = test.index.tolist()
targets









    Out[41]:





['Agriwast', 'BioBurBiof', 'Fossil', 'OtherNat', 'Wetlands']



In [42]:

    
nodes = sources + targets
nodes









    Out[42]:





['Bor_NAme',
 'contUSA',
 'CentName',
 'Trop_SAme',
 'Agriwast',
 'BioBurBiof',
 'Fossil',
 'OtherNat',
 'Wetlands']



In [60]:

    
idx = 0
for source in sources:
    print source
    for target in targets:
        #print idx        
        print target
        print test[source].loc[target]['mean']









    



Bor_NAme
Agriwast
3
BioBurBiof
1
Fossil
2
OtherNat
2
Wetlands
13
contUSA
Agriwast
17
BioBurBiof
0
Fossil
11
OtherNat
2
Wetlands
8
CentName
Agriwast
5
BioBurBiof
1
Fossil
2
OtherNat
1
Wetlands
2
Trop_SAme
Agriwast
21
BioBurBiof
6
Fossil
5
OtherNat
7
Wetlands
47



In [66]:

    
file = open('/homel/cnangini/PROJECTS/MethaneEmissions/sankey_TB_means.json', 'w')

file.write('{\n')
file.write('"nodes": [\n')
for node in nodes:
    file.write('{"name": "%s"},\n' %(node))
# remove last comma
file.seek(-2, os.SEEK_END)
file.truncate()
file.write('\n],\n')

file.write('"links": [\n')

for source in sources:
    print source
    for target in targets:
        print target
        print test[source].loc[target]['mean']
        value = test[source].loc[target]['mean']
        file.write('{"source": "%s", "target": "%s", "mean": "%.2f"},\n' %(source, target, float(value)))

# remove last comma
file.seek(-2, os.SEEK_END)
file.truncate()
file.write('\n]\n')
file.write('}\n')

file.close()









    



Bor_NAme
Agriwast
3
BioBurBiof
1
Fossil
2
OtherNat
2
Wetlands
13
contUSA
Agriwast
17
BioBurBiof
0
Fossil
11
OtherNat
2
Wetlands
8
CentName
Agriwast
5
BioBurBiof
1
Fossil
2
OtherNat
1
Wetlands
2
Trop_SAme
Agriwast
21
BioBurBiof
6
Fossil
5
OtherNat
7
Wetlands
47



In [ ]:

	0	1	2	3	4	5
0	proc	stats	Bor_NAme	contUSA	Cent_NAme	Trop_SAme
1	Wetlands	mean	13	8	2	47
2	Wetlands	min	7	6	2	30
3	Wetlands	max	21	9	2	61
4	OtherNat	mean	2	2	1	7
5	OtherNat	min	1	1	1	5
6	OtherNat	max	2	4	1	8
7	Agriwast	mean	3	17	5	21
8	Agriwast	min	2	15	6	19
9	Agriwast	max	3	23	7	24
10	Fossil	mean	2	11	2	5
11	Fossil	min	1	9	1	1
12	Fossil	max	2	15	3	8
13	BioBurBiof	mean	1	0	1	6
14	BioBurBiof	min	1	0	0	3
15	BioBurBiof	max	1	1	1	7