statistics on the resolution of our experiment

use dataframe to store the number of datapoints for each subsampling


In [1]:
import pandas as pd
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt

In [2]:
# let's make a index for
times = []
for i in range(2002, 2016):
    tmp= ('%d' % i) + '-11-01 to ' 
    tmp = tmp + ('%d' % (i+1)) + '-03-31'
    times.append(tmp)

print(times)


['2002-11-01 to 2003-03-31', '2003-11-01 to 2004-03-31', '2004-11-01 to 2005-03-31', '2005-11-01 to 2006-03-31', '2006-11-01 to 2007-03-31', '2007-11-01 to 2008-03-31', '2008-11-01 to 2009-03-31', '2009-11-01 to 2010-03-31', '2010-11-01 to 2011-03-31', '2011-11-01 to 2012-03-31', '2012-11-01 to 2013-03-31', '2013-11-01 to 2014-03-31', '2014-11-01 to 2015-03-31', '2015-11-01 to 2016-03-31']

In [3]:
# tags: selecting-from-multi-index-pandas 
#       http://pandas.pydata.org/pandas-docs/stable/advanced.html

arrays = [np.array(['2D', '2D',         '3D', '3D',        '4D', '4D',           '5D', '5D',         '6D', '6D',         '7D', '7D',         '8D', '8D',         '8DOC', '8DOC',      '9D', '9D',        '15D', '15D']),
          np.array(['Xarray', 'Interp', 'Xarray', 'Interp', 'Xarray', 'Interp',  'Xarray', 'Interp', 'Xarray', 'Interp', 'Xarray', 'Interp', 'Xarray', 'Interp', 'Xarray', 'Interp', 'Xarray', 'Interp', 'Xarray', 'Interp'])]

df = pd.DataFrame(np.zeros((14, 20)), index=times, columns = arrays)
df.columns.names  = ['freq', 'method']

#df.iloc[df.columns.get_level_values('freq')== '2D']
#print(df.xs('2D', level ='freq',axis=1))
#df.iloc[:, df.columns.get_level_values('freq') == '2D']= np.zeros((14,2))+1
print(df)


freq                         2D            3D            4D            5D  \
method                   Xarray Interp Xarray Interp Xarray Interp Xarray   
2002-11-01 to 2003-03-31    0.0    0.0    0.0    0.0    0.0    0.0    0.0   
2003-11-01 to 2004-03-31    0.0    0.0    0.0    0.0    0.0    0.0    0.0   
2004-11-01 to 2005-03-31    0.0    0.0    0.0    0.0    0.0    0.0    0.0   
2005-11-01 to 2006-03-31    0.0    0.0    0.0    0.0    0.0    0.0    0.0   
2006-11-01 to 2007-03-31    0.0    0.0    0.0    0.0    0.0    0.0    0.0   
2007-11-01 to 2008-03-31    0.0    0.0    0.0    0.0    0.0    0.0    0.0   
2008-11-01 to 2009-03-31    0.0    0.0    0.0    0.0    0.0    0.0    0.0   
2009-11-01 to 2010-03-31    0.0    0.0    0.0    0.0    0.0    0.0    0.0   
2010-11-01 to 2011-03-31    0.0    0.0    0.0    0.0    0.0    0.0    0.0   
2011-11-01 to 2012-03-31    0.0    0.0    0.0    0.0    0.0    0.0    0.0   
2012-11-01 to 2013-03-31    0.0    0.0    0.0    0.0    0.0    0.0    0.0   
2013-11-01 to 2014-03-31    0.0    0.0    0.0    0.0    0.0    0.0    0.0   
2014-11-01 to 2015-03-31    0.0    0.0    0.0    0.0    0.0    0.0    0.0   
2015-11-01 to 2016-03-31    0.0    0.0    0.0    0.0    0.0    0.0    0.0   

freq                                6D            7D            8D         \
method                   Interp Xarray Interp Xarray Interp Xarray Interp   
2002-11-01 to 2003-03-31    0.0    0.0    0.0    0.0    0.0    0.0    0.0   
2003-11-01 to 2004-03-31    0.0    0.0    0.0    0.0    0.0    0.0    0.0   
2004-11-01 to 2005-03-31    0.0    0.0    0.0    0.0    0.0    0.0    0.0   
2005-11-01 to 2006-03-31    0.0    0.0    0.0    0.0    0.0    0.0    0.0   
2006-11-01 to 2007-03-31    0.0    0.0    0.0    0.0    0.0    0.0    0.0   
2007-11-01 to 2008-03-31    0.0    0.0    0.0    0.0    0.0    0.0    0.0   
2008-11-01 to 2009-03-31    0.0    0.0    0.0    0.0    0.0    0.0    0.0   
2009-11-01 to 2010-03-31    0.0    0.0    0.0    0.0    0.0    0.0    0.0   
2010-11-01 to 2011-03-31    0.0    0.0    0.0    0.0    0.0    0.0    0.0   
2011-11-01 to 2012-03-31    0.0    0.0    0.0    0.0    0.0    0.0    0.0   
2012-11-01 to 2013-03-31    0.0    0.0    0.0    0.0    0.0    0.0    0.0   
2013-11-01 to 2014-03-31    0.0    0.0    0.0    0.0    0.0    0.0    0.0   
2014-11-01 to 2015-03-31    0.0    0.0    0.0    0.0    0.0    0.0    0.0   
2015-11-01 to 2016-03-31    0.0    0.0    0.0    0.0    0.0    0.0    0.0   

freq                       8DOC            9D           15D         
method                   Xarray Interp Xarray Interp Xarray Interp  
2002-11-01 to 2003-03-31    0.0    0.0    0.0    0.0    0.0    0.0  
2003-11-01 to 2004-03-31    0.0    0.0    0.0    0.0    0.0    0.0  
2004-11-01 to 2005-03-31    0.0    0.0    0.0    0.0    0.0    0.0  
2005-11-01 to 2006-03-31    0.0    0.0    0.0    0.0    0.0    0.0  
2006-11-01 to 2007-03-31    0.0    0.0    0.0    0.0    0.0    0.0  
2007-11-01 to 2008-03-31    0.0    0.0    0.0    0.0    0.0    0.0  
2008-11-01 to 2009-03-31    0.0    0.0    0.0    0.0    0.0    0.0  
2009-11-01 to 2010-03-31    0.0    0.0    0.0    0.0    0.0    0.0  
2010-11-01 to 2011-03-31    0.0    0.0    0.0    0.0    0.0    0.0  
2011-11-01 to 2012-03-31    0.0    0.0    0.0    0.0    0.0    0.0  
2012-11-01 to 2013-03-31    0.0    0.0    0.0    0.0    0.0    0.0  
2013-11-01 to 2014-03-31    0.0    0.0    0.0    0.0    0.0    0.0  
2014-11-01 to 2015-03-31    0.0    0.0    0.0    0.0    0.0    0.0  
2015-11-01 to 2016-03-31    0.0    0.0    0.0    0.0    0.0    0.0  

In [4]:
data_dict = {}

In [18]:
# using the xarray interpolation
count_2d_xarray = [82, 1, 9, 76, 35, 132, 32, 46, 3, 37, 0, 176, 126, 50]  # 805
data_dict ={'xarray_2d': count_2d_xarray} # need to hold something before use .update function

df.iloc[:, (df.columns.get_level_values('freq') == '2D') 
        & (df.columns.get_level_values('method') == 'Xarray')] = data_dict['xarray_2d']
#df

In [19]:
# using the new multidimensional interpolation
count_2d_interp = [196, 2, 30, 219, 142, 401, 90, 154, 16, 122, 0, 404, 305, 155]  # 2236
data_dict.update({'interp_2d': count_2d_interp})

df.iloc[:, (df.columns.get_level_values('freq') == '2D') 
        & (df.columns.get_level_values('method') == 'Interp')] = data_dict['interp_2d']
#df

In [20]:
count_3d_xarray = [76, 0, 5, 65, 38, 108, 35, 44, 3, 36, 0, 160, 119, 50] # 739
data_dict.update({'xarray_3d': count_3d_xarray})

df.iloc[:, (df.columns.get_level_values('freq') == '3D') 
        & (df.columns.get_level_values('method') == 'Xarray')] = data_dict['xarray_3d']
#df

In [21]:
count_3d_interp = [167, 2, 32, 179, 131, 346, 81, 132, 21, 113, 0, 313, 259, 135]  # 1911
data_dict ={'interp_3d': count_3d_interp}

df.iloc[:, (df.columns.get_level_values('freq') == '3D') 
        & (df.columns.get_level_values('method') == 'Interp')] = data_dict['interp_3d']
#df

In [22]:
count_4d_xarray = [66, 0, 9, 67, 30, 126, 36, 55, 1, 40, 0, 169, 118, 57] # 774
data_dict.update({'xarray_4d': count_4d_xarray})

df.iloc[:, (df.columns.get_level_values('freq') == '4D') 
        & (df.columns.get_level_values('method') == 'Xarray')] = data_dict['xarray_4d']

In [23]:
count_4d_interp = [147, 3, 32, 166, 105, 314, 75, 119, 15, 97, 0, 266, 222, 115] # 1676
data_dict.update({'interp_4d': count_4d_interp})

df.iloc[:, (df.columns.get_level_values('freq') == '4D') 
        & (df.columns.get_level_values('method') == 'Interp')] = data_dict['interp_4d']
#df

In [24]:
count_5d_xarray = [73, 1, 6, 71, 39, 120, 30, 45, 5, 46, 0, 153, 105, 60] # 754
data_dict.update({'xarray_5d': count_5d_xarray})

df.iloc[:, (df.columns.get_level_values('freq') == '5D') 
        & (df.columns.get_level_values('method') == 'Xarray')] = data_dict['xarray_5d']

In [25]:
count_5d_interp = [128, 3, 31, 145, 88, 275, 62, 103, 17, 89, 0, 222, 180, 101] # 1444
data_dict.update({'interp_5d': count_5d_interp})

df.iloc[:, (df.columns.get_level_values('freq') == '5D') 
        & (df.columns.get_level_values('method') == 'Interp')] = data_dict['interp_5d']
#df

In [28]:
count_6d_xarray = [71, 1, 7, 75, 51, 119, 27, 55, 7, 45, 0, 125, 110, 52] # 745
data_dict.update({'xarray_6d': count_6d_xarray})

df.iloc[:, (df.columns.get_level_values('freq') == '6D') 
        & (df.columns.get_level_values('method') == 'Xarray')] = data_dict['xarray_6d']

In [29]:
count_6d_interp = [109, 1, 27, 127, 94, 232, 57, 97, 14, 81, 0, 190, 171, 85] # 1285
data_dict.update({'interp_6d': count_6d_interp})

df.iloc[:, (df.columns.get_level_values('freq') == '6D') 
        & (df.columns.get_level_values('method') == 'Interp')] = data_dict['interp_6d']
#df

In [30]:
count_7d_xarray = [63, 0, 6, 72, 40, 122, 28, 58, 6, 44, 0, 118, 110, 56]   # 723
data_dict.update({'xarray_7d': count_7d_xarray})

df.iloc[:, (df.columns.get_level_values('freq') == '7D') 
        & (df.columns.get_level_values('method') == 'Xarray')] = data_dict['xarray_7d']

In [31]:
count_7d_interp = [96, 1, 29, 120, 83, 221, 50, 82, 11, 67, 0, 166, 155, 80]   # 1161
data_dict.update({'interp_7d': count_7d_interp})

df.iloc[:, (df.columns.get_level_values('freq') == '7D') 
        & (df.columns.get_level_values('method') == 'Interp')] = data_dict['interp_7d']
#df

In [32]:
count_8d_xarray =  [56, 0, 11, 74, 47, 109, 31, 49, 3, 35, 0, 125, 97, 55] #  692
data_dict.update({'xarray_8d': count_8d_xarray})

df.iloc[:, (df.columns.get_level_values('freq') == '8D') 
        & (df.columns.get_level_values('method') == 'Xarray')] = data_dict['xarray_8d']

In [33]:
count_8d_interp =  [89, 1, 24, 109, 79, 219, 44, 82, 11, 63, 0, 157, 140, 71] # 1089
data_dict.update({'interp_8d': count_8d_interp})

df.iloc[:, (df.columns.get_level_values('freq') == '8D') 
        & (df.columns.get_level_values('method') == 'Interp')] = data_dict['interp_8d']
#df

In [34]:
# directly using OC data, no interpolation involved
#del(data_dict['count_8d_DS'])
count_8d_OC_xarray = [56, 0, 11, 73, 44, 108, 29, 51, 1, 38, 0, 120, 97, 55] # 683
data_dict.update({'xarray_8d_oc': count_8d_OC_xarray})

df.iloc[:, (df.columns.get_level_values('freq') == '8DOC') 
        & (df.columns.get_level_values('method') == 'Xarray')] = data_dict['xarray_8d_oc']

In [35]:
count_8d_OC_interp = [87, 0, 24, 108, 80, 213, 43, 74, 11, 59, 0, 157, 143, 68] # 1067  
data_dict.update({'interp_8d_oc': count_8d_OC_interp})

df.iloc[:, (df.columns.get_level_values('freq') == '8DOC') 
        & (df.columns.get_level_values('method') == 'Interp')] = data_dict['interp_8d_oc']

In [37]:
count_9d_xarray = [58, 1, 9, 66, 44, 129, 28, 50, 5, 32, 0, 106, 91, 53]   # 672
data_dict.update({'xarray_9d': count_9d_xarray})


df.iloc[:, (df.columns.get_level_values('freq') == '9D') 
        & (df.columns.get_level_values('method') == 'Xarray')] = data_dict['xarray_9d']

In [38]:
count_9d_interp = [82, 2, 19, 93, 69, 199, 41, 70, 9, 61, 0, 136, 130, 66]   # 977
data_dict.update({'interp_9d': count_9d_interp})

df.iloc[:, (df.columns.get_level_values('freq') == '9D') 
        & (df.columns.get_level_values('method') == 'Interp')] = data_dict['interp_9d']

In [39]:
count_15d_xarray = [43, 1, 13, 46, 34, 94, 19, 43, 3, 30, 0, 73, 67, 34]   # 500
data_dict.update({'xarray_15d': count_15d_xarray})

df.iloc[:, (df.columns.get_level_values('freq') == '15D') 
        & (df.columns.get_level_values('method') == 'Xarray')] = data_dict['xarray_15d']

In [40]:
count_15d_interp = [51, 1, 15, 62, 46, 124, 26, 46, 7, 40, 0, 80, 82, 39]   # 619
data_dict.update({'interp_15d': count_15d_interp})

df.iloc[:, (df.columns.get_level_values('freq') == '15D') 
        & (df.columns.get_level_values('method') == 'Interp')] = data_dict['interp_15d']

print(df)


freq                         2D            3D            4D            5D  \
method                   Xarray Interp Xarray Interp Xarray Interp Xarray   
2002-11-01 to 2003-03-31     82    196     76    167     66    147     73   
2003-11-01 to 2004-03-31      1      2      0      2      0      3      1   
2004-11-01 to 2005-03-31      9     30      5     32      9     32      6   
2005-11-01 to 2006-03-31     76    219     65    179     67    166     71   
2006-11-01 to 2007-03-31     35    142     38    131     30    105     39   
2007-11-01 to 2008-03-31    132    401    108    346    126    314    120   
2008-11-01 to 2009-03-31     32     90     35     81     36     75     30   
2009-11-01 to 2010-03-31     46    154     44    132     55    119     45   
2010-11-01 to 2011-03-31      3     16      3     21      1     15      5   
2011-11-01 to 2012-03-31     37    122     36    113     40     97     46   
2012-11-01 to 2013-03-31      0      0      0      0      0      0      0   
2013-11-01 to 2014-03-31    176    404    160    313    169    266    153   
2014-11-01 to 2015-03-31    126    305    119    259    118    222    105   
2015-11-01 to 2016-03-31     50    155     50    135     57    115     60   

freq                                6D            7D            8D         \
method                   Interp Xarray Interp Xarray Interp Xarray Interp   
2002-11-01 to 2003-03-31    128     71    109     63     96     56     89   
2003-11-01 to 2004-03-31      3      1      1      0      1      0      1   
2004-11-01 to 2005-03-31     31      7     27      6     29     11     24   
2005-11-01 to 2006-03-31    145     75    127     72    120     74    109   
2006-11-01 to 2007-03-31     88     51     94     40     83     47     79   
2007-11-01 to 2008-03-31    275    119    232    122    221    109    219   
2008-11-01 to 2009-03-31     62     27     57     28     50     31     44   
2009-11-01 to 2010-03-31    103     55     97     58     82     49     82   
2010-11-01 to 2011-03-31     17      7     14      6     11      3     11   
2011-11-01 to 2012-03-31     89     45     81     44     67     35     63   
2012-11-01 to 2013-03-31      0      0      0      0      0      0      0   
2013-11-01 to 2014-03-31    222    125    190    118    166    125    157   
2014-11-01 to 2015-03-31    180    110    171    110    155     97    140   
2015-11-01 to 2016-03-31    101     52     85     56     80     55     71   

freq                       8DOC            9D           15D         
method                   Xarray Interp Xarray Interp Xarray Interp  
2002-11-01 to 2003-03-31     56     87     58     82     43     51  
2003-11-01 to 2004-03-31      0      0      1      2      1      1  
2004-11-01 to 2005-03-31     11     24      9     19     13     15  
2005-11-01 to 2006-03-31     73    108     66     93     46     62  
2006-11-01 to 2007-03-31     44     80     44     69     34     46  
2007-11-01 to 2008-03-31    108    213    129    199     94    124  
2008-11-01 to 2009-03-31     29     43     28     41     19     26  
2009-11-01 to 2010-03-31     51     74     50     70     43     46  
2010-11-01 to 2011-03-31      1     11      5      9      3      7  
2011-11-01 to 2012-03-31     38     59     32     61     30     40  
2012-11-01 to 2013-03-31      0      0      0      0      0      0  
2013-11-01 to 2014-03-31    120    157    106    136     73     80  
2014-11-01 to 2015-03-31     97    143     91    130     67     82  
2015-11-01 to 2016-03-31     55     68     53     66     34     39  

In [41]:
#print(data_dict)

In [42]:
'''
base = 'count_'
col_name = []
for i in range(2,10):
    tmp = base+'%s' % i
    tmp = tmp+ 'd'
    col_name.append(tmp)
#print(col_name)

final = base +'8d_DS' # add one more from the dataset directly
col_name.append(final)
print(col_name)
'''


Out[42]:
"\nbase = 'count_'\ncol_name = []\nfor i in range(2,10):\n    tmp = base+'%s' % i\n    tmp = tmp+ 'd'\n    col_name.append(tmp)\n#print(col_name)\n\nfinal = base +'8d_DS' # add one more from the dataset directly\ncol_name.append(final)\nprint(col_name)\n"

In [43]:
'''
data_fm= pd.DataFrame(data=data_dict, index = times)
data_fm
'''


Out[43]:
'\ndata_fm= pd.DataFrame(data=data_dict, index = times)\ndata_fm\n'

In [44]:
df.sum(axis=0)   # * the 2d resampling seems to bring us most data points.
                 # * the multidimensional interpilation approach brings us more data points.


Out[44]:
freq  method
2D    Xarray     805
      Interp    2236
3D    Xarray     739
      Interp    1911
4D    Xarray     774
      Interp    1676
5D    Xarray     754
      Interp    1444
6D    Xarray     745
      Interp    1285
7D    Xarray     723
      Interp    1161
8D    Xarray     692
      Interp    1089
8DOC  Xarray     683
      Interp    1067
9D    Xarray     672
      Interp     977
15D   Xarray     500
      Interp     619
dtype: int64

In [ ]: