In [1]:
import pandas as pd
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
In [2]:
# let's make a index for
times = []
for i in range(2002, 2016):
tmp= ('%d' % i) + '-11-01 to '
tmp = tmp + ('%d' % (i+1)) + '-03-31'
times.append(tmp)
print(times)
In [3]:
# tags: selecting-from-multi-index-pandas
# http://pandas.pydata.org/pandas-docs/stable/advanced.html
arrays = [np.array(['2D', '2D', '3D', '3D', '4D', '4D', '5D', '5D', '6D', '6D', '7D', '7D', '8D', '8D', '8DOC', '8DOC', '9D', '9D', '15D', '15D']),
np.array(['Xarray', 'Interp', 'Xarray', 'Interp', 'Xarray', 'Interp', 'Xarray', 'Interp', 'Xarray', 'Interp', 'Xarray', 'Interp', 'Xarray', 'Interp', 'Xarray', 'Interp', 'Xarray', 'Interp', 'Xarray', 'Interp'])]
df = pd.DataFrame(np.zeros((14, 20)), index=times, columns = arrays)
df.columns.names = ['freq', 'method']
#df.iloc[df.columns.get_level_values('freq')== '2D']
#print(df.xs('2D', level ='freq',axis=1))
#df.iloc[:, df.columns.get_level_values('freq') == '2D']= np.zeros((14,2))+1
print(df)
In [4]:
data_dict = {}
In [18]:
# using the xarray interpolation
count_2d_xarray = [82, 1, 9, 76, 35, 132, 32, 46, 3, 37, 0, 176, 126, 50] # 805
data_dict ={'xarray_2d': count_2d_xarray} # need to hold something before use .update function
df.iloc[:, (df.columns.get_level_values('freq') == '2D')
& (df.columns.get_level_values('method') == 'Xarray')] = data_dict['xarray_2d']
#df
In [19]:
# using the new multidimensional interpolation
count_2d_interp = [196, 2, 30, 219, 142, 401, 90, 154, 16, 122, 0, 404, 305, 155] # 2236
data_dict.update({'interp_2d': count_2d_interp})
df.iloc[:, (df.columns.get_level_values('freq') == '2D')
& (df.columns.get_level_values('method') == 'Interp')] = data_dict['interp_2d']
#df
In [20]:
count_3d_xarray = [76, 0, 5, 65, 38, 108, 35, 44, 3, 36, 0, 160, 119, 50] # 739
data_dict.update({'xarray_3d': count_3d_xarray})
df.iloc[:, (df.columns.get_level_values('freq') == '3D')
& (df.columns.get_level_values('method') == 'Xarray')] = data_dict['xarray_3d']
#df
In [21]:
count_3d_interp = [167, 2, 32, 179, 131, 346, 81, 132, 21, 113, 0, 313, 259, 135] # 1911
data_dict ={'interp_3d': count_3d_interp}
df.iloc[:, (df.columns.get_level_values('freq') == '3D')
& (df.columns.get_level_values('method') == 'Interp')] = data_dict['interp_3d']
#df
In [22]:
count_4d_xarray = [66, 0, 9, 67, 30, 126, 36, 55, 1, 40, 0, 169, 118, 57] # 774
data_dict.update({'xarray_4d': count_4d_xarray})
df.iloc[:, (df.columns.get_level_values('freq') == '4D')
& (df.columns.get_level_values('method') == 'Xarray')] = data_dict['xarray_4d']
In [23]:
count_4d_interp = [147, 3, 32, 166, 105, 314, 75, 119, 15, 97, 0, 266, 222, 115] # 1676
data_dict.update({'interp_4d': count_4d_interp})
df.iloc[:, (df.columns.get_level_values('freq') == '4D')
& (df.columns.get_level_values('method') == 'Interp')] = data_dict['interp_4d']
#df
In [24]:
count_5d_xarray = [73, 1, 6, 71, 39, 120, 30, 45, 5, 46, 0, 153, 105, 60] # 754
data_dict.update({'xarray_5d': count_5d_xarray})
df.iloc[:, (df.columns.get_level_values('freq') == '5D')
& (df.columns.get_level_values('method') == 'Xarray')] = data_dict['xarray_5d']
In [25]:
count_5d_interp = [128, 3, 31, 145, 88, 275, 62, 103, 17, 89, 0, 222, 180, 101] # 1444
data_dict.update({'interp_5d': count_5d_interp})
df.iloc[:, (df.columns.get_level_values('freq') == '5D')
& (df.columns.get_level_values('method') == 'Interp')] = data_dict['interp_5d']
#df
In [28]:
count_6d_xarray = [71, 1, 7, 75, 51, 119, 27, 55, 7, 45, 0, 125, 110, 52] # 745
data_dict.update({'xarray_6d': count_6d_xarray})
df.iloc[:, (df.columns.get_level_values('freq') == '6D')
& (df.columns.get_level_values('method') == 'Xarray')] = data_dict['xarray_6d']
In [29]:
count_6d_interp = [109, 1, 27, 127, 94, 232, 57, 97, 14, 81, 0, 190, 171, 85] # 1285
data_dict.update({'interp_6d': count_6d_interp})
df.iloc[:, (df.columns.get_level_values('freq') == '6D')
& (df.columns.get_level_values('method') == 'Interp')] = data_dict['interp_6d']
#df
In [30]:
count_7d_xarray = [63, 0, 6, 72, 40, 122, 28, 58, 6, 44, 0, 118, 110, 56] # 723
data_dict.update({'xarray_7d': count_7d_xarray})
df.iloc[:, (df.columns.get_level_values('freq') == '7D')
& (df.columns.get_level_values('method') == 'Xarray')] = data_dict['xarray_7d']
In [31]:
count_7d_interp = [96, 1, 29, 120, 83, 221, 50, 82, 11, 67, 0, 166, 155, 80] # 1161
data_dict.update({'interp_7d': count_7d_interp})
df.iloc[:, (df.columns.get_level_values('freq') == '7D')
& (df.columns.get_level_values('method') == 'Interp')] = data_dict['interp_7d']
#df
In [32]:
count_8d_xarray = [56, 0, 11, 74, 47, 109, 31, 49, 3, 35, 0, 125, 97, 55] # 692
data_dict.update({'xarray_8d': count_8d_xarray})
df.iloc[:, (df.columns.get_level_values('freq') == '8D')
& (df.columns.get_level_values('method') == 'Xarray')] = data_dict['xarray_8d']
In [33]:
count_8d_interp = [89, 1, 24, 109, 79, 219, 44, 82, 11, 63, 0, 157, 140, 71] # 1089
data_dict.update({'interp_8d': count_8d_interp})
df.iloc[:, (df.columns.get_level_values('freq') == '8D')
& (df.columns.get_level_values('method') == 'Interp')] = data_dict['interp_8d']
#df
In [34]:
# directly using OC data, no interpolation involved
#del(data_dict['count_8d_DS'])
count_8d_OC_xarray = [56, 0, 11, 73, 44, 108, 29, 51, 1, 38, 0, 120, 97, 55] # 683
data_dict.update({'xarray_8d_oc': count_8d_OC_xarray})
df.iloc[:, (df.columns.get_level_values('freq') == '8DOC')
& (df.columns.get_level_values('method') == 'Xarray')] = data_dict['xarray_8d_oc']
In [35]:
count_8d_OC_interp = [87, 0, 24, 108, 80, 213, 43, 74, 11, 59, 0, 157, 143, 68] # 1067
data_dict.update({'interp_8d_oc': count_8d_OC_interp})
df.iloc[:, (df.columns.get_level_values('freq') == '8DOC')
& (df.columns.get_level_values('method') == 'Interp')] = data_dict['interp_8d_oc']
In [37]:
count_9d_xarray = [58, 1, 9, 66, 44, 129, 28, 50, 5, 32, 0, 106, 91, 53] # 672
data_dict.update({'xarray_9d': count_9d_xarray})
df.iloc[:, (df.columns.get_level_values('freq') == '9D')
& (df.columns.get_level_values('method') == 'Xarray')] = data_dict['xarray_9d']
In [38]:
count_9d_interp = [82, 2, 19, 93, 69, 199, 41, 70, 9, 61, 0, 136, 130, 66] # 977
data_dict.update({'interp_9d': count_9d_interp})
df.iloc[:, (df.columns.get_level_values('freq') == '9D')
& (df.columns.get_level_values('method') == 'Interp')] = data_dict['interp_9d']
In [39]:
count_15d_xarray = [43, 1, 13, 46, 34, 94, 19, 43, 3, 30, 0, 73, 67, 34] # 500
data_dict.update({'xarray_15d': count_15d_xarray})
df.iloc[:, (df.columns.get_level_values('freq') == '15D')
& (df.columns.get_level_values('method') == 'Xarray')] = data_dict['xarray_15d']
In [40]:
count_15d_interp = [51, 1, 15, 62, 46, 124, 26, 46, 7, 40, 0, 80, 82, 39] # 619
data_dict.update({'interp_15d': count_15d_interp})
df.iloc[:, (df.columns.get_level_values('freq') == '15D')
& (df.columns.get_level_values('method') == 'Interp')] = data_dict['interp_15d']
print(df)
In [41]:
#print(data_dict)
In [42]:
'''
base = 'count_'
col_name = []
for i in range(2,10):
tmp = base+'%s' % i
tmp = tmp+ 'd'
col_name.append(tmp)
#print(col_name)
final = base +'8d_DS' # add one more from the dataset directly
col_name.append(final)
print(col_name)
'''
Out[42]:
In [43]:
'''
data_fm= pd.DataFrame(data=data_dict, index = times)
data_fm
'''
Out[43]:
In [44]:
df.sum(axis=0) # * the 2d resampling seems to bring us most data points.
# * the multidimensional interpilation approach brings us more data points.
Out[44]:
In [ ]: