In [14]:
import os
import re
import pickle
import time
import datetime

import numpy as np
import pandas as pd

import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec

from scipy.sparse import csr_matrix, vstack

%matplotlib inline

# Custom modules
import const
import func

Load data


In [15]:
# Numeric data
num_dat = func.load_data_file('train_numeric')

y = num_dat['data']['y']
data = num_dat['data']['features']
ids = num_dat['data']['ids']
n_f_names = num_dat['data']['feature_names'][1:]

del num_dat


Returning <open file '/Volumes/My Book/kaggle_bosch/train_numeric.pkl', mode 'rb' at 0x12615b270>.pkl

In [16]:
# Load numeric data test
num_dat = func.load_data_file('test_numeric')
ids_test = num_dat['data']['ids']
data = vstack([data, num_dat['data']['features']], format='csr')
del num_dat


Returning <open file '/Volumes/My Book/kaggle_bosch/test_numeric.pkl', mode 'rb' at 0x12615b270>.pkl

In [17]:
ids_all = pd.concat([ids, ids_test], axis=0)

In [18]:
n_1 = y[y.Response==1].index.values
n_0 = y[y.Response==0].index.values

In [19]:
ids_test.head(3)


Out[19]:
Id
0 1
1 2
2 3

In [20]:
y = y.reindex(pd.concat([ids.Id, ids_test.Id]))

In [21]:
# Load look-up table
lut = pd.read_csv(const.LOOK_UP_TABLE)
lut.head(3)


Out[21]:
line station feature_nr feat_nr_dat name_dat name_cat name_num col_dat col_num col_cat station_V2 line_V2
0 0 0 0 1.0 L0_S0_D1 NaN L0_S0_F0 0.0 0.0 NaN 0.0 1.0
1 0 0 2 3.0 L0_S0_D3 NaN L0_S0_F2 1.0 1.0 NaN 0.0 1.0
2 0 0 4 5.0 L0_S0_D5 NaN L0_S0_F4 2.0 2.0 NaN 0.0 1.0

In [22]:
# Read start time per station (discard features related to t delta)
t_station = pd.read_csv(os.path.join(const.DATA_PATH, 'feat_set_date_station.csv')).iloc[:,:129]
t_station.head(3)


Out[22]:
Id t_0.0 t_1.0 t_2.0 t_3.0 t_4.0 t_5.0 t_6.0 t_7.0 t_8.0 ... t_42.0 t_43.0 t_44.0 t_45.0 t_46.0 t_47.0 t_48.0 t_49.0 t_50.0 t_51.0
0 4 8224.0 8224.0 8224.0 NaN 8226.0 NaN NaN 8226.0 8227.0 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1 6 NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2 7 161870.0 161870.0 161870.0 NaN NaN 161872.0 161872.0 NaN 161873.0 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

3 rows × 129 columns


In [23]:
data.shape[0]


Out[23]:
2367495

Calculate lead/lag value per numeric feature


In [24]:
def calculate_lead_lag_numeric(ft_nr):

    # Get station number 
    station = lut[lut.feature_nr==ft_nr].station_V2.values[0]
    
    # Column nr in sparse array
    n_col = lut[lut.feature_nr==ft_nr].col_num.values[0]
    
    if not n_col>=0:
        return None
    
    # Samples with value for this feature
    i_n = data[:, n_col].nonzero()[0]
    
    # Timestamps for these values from station
    i_t = (~t_station.iloc[i_n, t_station.columns.get_loc('t_' + str(station))].isnull()).index.values

    # Samples with value and timestamp
    i = list(set(i_n) & set(i_t))
    
    # Get value, timestamp, and Response for each sample and combine in DataFrame
    v = data[i, n_col].todense().A1
    
    # Station timestamps were stored *100, so need to divide by 100 again
    t = 1.0/100*t_station.iloc[i, t_station.columns.get_loc('t_' + str(station))].values
    r = y.iloc[i]['Response'].values
    
    df = pd.DataFrame({'ID': ids_all.iloc[i].Id.values, 
                       't': t,
                       'v': v, 
                       'r':r})
    
    #df['t'] = df['t'].round(0)
    
    df.sort_values(['t','v','ID'], inplace=True)

    # Check nan in t (if large, adjust code for t)
    print('F nr: {} | Feature values: {} | Time values: {} | Nans found: {}'.format(ft_nr,
                                                                                    len(i_n),
                                                                                    len(i_t),
                                                                                    (np.isnan(t)).sum()))
    
    # Calculate lead and lag values...
    
    # Selected widths
    widths = [10, 20, 50, 100]
    
    # Use Pandas rolling windows to quickly generate lead and lag values
    # All are shifted by 1 to prevent that sample itself is included (leakage)
    for w in widths:
        df['next_{}_num_nan'.format(w)] = \
            df['r'][::-1].rolling(window=w, center=False, min_periods=1). \
                apply(lambda x: np.count_nonzero(np.isnan(x))).shift(1)
        df['next_{}_num_sum'.format(w)] = \
            df['r'][::-1].rolling(window=w, center=False, min_periods=1). \
                sum().shift(1)

        df['previous_{}_num_nan'.format(w)] = \
            df['r'].rolling(window=w, center=False, min_periods=1). \
                apply(lambda x: np.count_nonzero(np.isnan(x))).shift(1)
        df['previous_{}_num_sum'.format(w)] = \
            df['r'].rolling(window=w, center=False, min_periods=1). \
                sum().shift(1)
    
    # Group
    #df.groupby('r').filter.mean()
    
    return df

In [27]:
a = calculate_lead_lag_numeric(3857)


F nr: 3857 | Feature values: 2229189 | Time values: 2229189 | Nans found: 0

In [28]:
a=a[['ID','r','t','v']]
a.sort_values('ID', inplace=True)

a['t_prev'] = a['t'].shift(1)
a['t_next'] = a['t'].shift(-1)

a['r_prev'] = a['r'].shift(1)
a['r_next'] = a['r'].shift(-1)

a['v_prev'] = a['v'].shift(1)
a['v_next'] = a['v'].shift(-1)

In [59]:
a.sort_values(['t','v'], inplace=True)
a['ID_diff'] = a['ID'].diff()
a['ID_diff_rev'] = a['ID'][::-1].diff()
a.loc[a['ID_diff'].abs()>1, 'ID_diff'] = 9999
a.loc[a['ID_diff_rev'].abs()>1, 'ID_diff_rev'] = 9999
a['r_prev2'] = a['r'].shift(1)
a['r_next2'] = a['r'].shift(-1)

In [29]:
# GF0
a['gf0_t'] = -1 + \
                1 * ((a['t_prev']==a['t']) & (a['r_prev'].isnull())).astype(int) + \
                2 * ((a['t_prev']==a['t']) & (a['r_prev']==1)).astype(int)
        
# GF0
a['gf1_t'] = -1 + \
                1 * ((a['t_next']==a['t']) & (a['r_next'].isnull())).astype(int) + \
                2 * ((a['t_next']==a['t']) & (a['r_next']==1)).astype(int)
        
a['gf0_num'] = -1 + \
                1 * ((a['v_prev']==a['v']) & (a['r_prev'].isnull())).astype(int) + \
                2 * ((a['v_prev']==a['v']) & (a['r_prev']==1)).astype(int)
        
# GF0
a['gf1_num'] = -1 + \
                1 * ((a['v_next']==a['v']) & (a['r_next'].isnull())).astype(int) + \
                2 * ((a['v_next']==a['v']) & (a['r_next']==1)).astype(int)
        
a['gf0_num_t'] = -1 + \
                1 * ((a['t_prev']==a['t']) & (a['v_prev']==a['v']) & (a['r_prev'].isnull())).astype(int) + \
                2 * ((a['t_prev']==a['t']) & (a['v_prev']==a['v']) & (a['r_prev']==1)).astype(int)
        
# GF0
a['gf1_num_t'] = -1 + \
                1 * ((a['t_next']==a['t']) & (a['v_next']==a['v']) & (a['r_next'].isnull())).astype(int) + \
                2 * ((a['t_next']==a['t']) & (a['v_next']==a['v']) & (a['r_next']==1)).astype(int)

In [45]:
a[a['ID']==559839][['ID','r','r_prev2','r_next2','ID_diff']]


Out[45]:
ID r r_prev2 r_next2 ID_diff
263360 559839 0.0 1.0 NaN 1.0

In [51]:
(a[a['ID_diff']==1]['r_prev2'] - a[a['ID_diff']==1]['r']).value_counts()


Out[51]:
 0.0    30787
 1.0     1763
-1.0       98
dtype: int64

In [ ]:


In [60]:
res = a.groupby(['gf0_num', 'gf1_num','ID_diff','ID_diff_rev','r']).count().iloc[:,0].unstack('r')
res['ratio'] = res[1.0] / res[0.0] 
res


Out[60]:
r 0.0 1.0 ratio
gf0_num gf1_num ID_diff ID_diff_rev
-1 -1 1.0 -1.0 3994.0 34.0 0.008513
9999.0 22399.0 19.0 0.000848
9999.0 -1.0 22325.0 1487.0 0.066607
9999.0 997332.0 1432.0 0.001436
0 1.0 -1.0 4106.0 42.0 0.010229
9999.0 37.0 NaN NaN
9999.0 -1.0 21979.0 1736.0 0.078984
9999.0 1463.0 1.0 0.000684
1 1.0 -1.0 14.0 3.0 0.214286
9999.0 -1.0 61.0 208.0 3.409836
9999.0 1.0 NaN NaN
0 -1 1.0 -1.0 4122.0 134.0 0.032508
9999.0 23808.0 49.0 0.002058
9999.0 -1.0 35.0 2.0 0.057143
9999.0 1524.0 3.0 0.001969
0 1.0 -1.0 4083.0 139.0 0.034044
9999.0 42.0 NaN NaN
9999.0 -1.0 31.0 1.0 0.032258
9999.0 2.0 NaN NaN
1 1.0 -1.0 23.0 19.0 0.826087
9999.0 1.0 NaN NaN
1 -1 1.0 -1.0 74.0 106.0 1.432432
9999.0 1623.0 20.0 0.012323
9999.0 9999.0 3.0 NaN NaN
0 1.0 -1.0 64.0 104.0 1.625000
9999.0 2.0 NaN NaN
1 1.0 -1.0 NaN 7.0 NaN

In [58]:
res = a.groupby(['gf0_num', 'gf1_num','r']).count().iloc[:,0].unstack('r')
res['ratio'] = res[1.0] / res[0.0] 
res


Out[58]:
r 0.0 1.0 ratio
gf0_num gf1_num
-1 -1 1046051.0 2972.0 0.002841
0 27585.0 1779.0 0.064492
1 76.0 211.0 2.776316
0 -1 29489.0 188.0 0.006375
0 4158.0 140.0 0.033670
1 24.0 19.0 0.791667
1 -1 1700.0 126.0 0.074118
0 66.0 104.0 1.575758
1 NaN 7.0 NaN

In [53]:
a.head()


Out[53]:
ID r t v t_prev t_next r_prev r_next v_prev v_next gf0_t gf1_t gf0_num gf1_num gf0_num_t gf1_num_t ID_diff r_prev2 r_next2
191443 407384 0.0 0.44 -0.032 584.88 974.93 NaN 0.0 0.006 0.133 -1 -1 -1 -1 -1 -1 NaN NaN NaN
1338803 476673 NaN 0.44 0.002 550.23 570.96 NaN 0.0 -0.015 -0.137 -1 -1 -1 -1 -1 -1 69289.0 0.0 0.0
185672 395059 0.0 0.44 0.018 1064.61 1325.88 0.0 0.0 0.196 -0.021 -1 -1 -1 -1 -1 -1 -81614.0 NaN 0.0
186881 397647 0.0 0.44 0.036 575.34 1105.37 NaN 0.0 -0.075 0.095 -1 -1 -1 -1 -1 -1 2588.0 0.0 NaN
1411922 631848 NaN 0.44 0.056 777.54 460.43 0.0 0.0 -0.009 0.018 -1 -1 -1 -1 -1 -1 234201.0 0.0 0.0

In [155]:
a[(a['gf1_num']==1) & (a['gf1_t']==0)]


Out[155]:
ID r t v t_prev t_next r_prev r_next v_prev v_next gf0_t gf1_t gf0_num gf1_num gf0_num_t gf1_num_t

In [156]:
res = a.groupby(['gf0_t', 'gf1_t','r']).count().iloc[:,0].unstack('r')
res['ratio'] = res[1.0] / res[0.0] 
res


Out[156]:
r 0.0 1.0 ratio
gf0_t gf1_t
-1 -1 1052076.0 3356.0 0.003190
0 26203.0 2117.0 0.080792
1 75.0 296.0 3.946667
0 -1 28469.0 240.0 0.008430
0 4094.0 194.0 0.047386
1 23.0 20.0 0.869565
1 -1 2066.0 171.0 0.082769
0 77.0 145.0 1.883117
1 NaN 7.0 NaN

In [157]:
res = a.groupby(['gf0_num', 'gf1_num','r']).count().iloc[:,0].unstack('r')
res['ratio'] = res[1.0] / res[0.0] 
res


Out[157]:
r 0.0 1.0 ratio
gf0_num gf1_num
-1 -1 1035190.0 3312.0 0.003199
0 34361.0 2114.0 0.061523
1 113.0 295.0 2.610619
0 -1 36706.0 263.0 0.007165
0 4529.0 218.0 0.048134
1 26.0 21.0 0.807692
1 -1 2072.0 171.0 0.082529
0 86.0 145.0 1.686047
1 NaN 7.0 NaN

In [158]:
res = a.groupby(['gf0_num_t', 'gf1_num_t','r']).count().iloc[:,0].unstack('r')
res['ratio'] = res[1.0] / res[0.0] 
res


Out[158]:
r 0.0 1.0 ratio
gf0_num_t gf1_num_t
-1 -1 1052077.0 3356.0 0.003190
0 26203.0 2117.0 0.080792
1 75.0 296.0 3.946667
0 -1 28468.0 240.0 0.008431
0 4094.0 194.0 0.047386
1 23.0 20.0 0.869565
1 -1 2066.0 171.0 0.082769
0 77.0 145.0 1.883117
1 NaN 7.0 NaN

In [68]:
a[a['t']==836.9].groupby('v')['r'].agg(['count','sum','mean']).reset_index().sort_values('sum', ascending=False)


Out[68]:
v count sum mean
0 -0.212 14 8.0 0.571429
4 0.013 8 1.0 0.125000
1 -0.156 5 0.0 0.000000
2 -0.099 15 0.0 0.000000
3 -0.043 15 0.0 0.000000
5 0.069 16 0.0 0.000000
6 0.125 12 0.0 0.000000
7 0.181 10 0.0 0.000000
8 0.238 7 0.0 0.000000

In [180]:
def max_min(x):
    return x.max()-x.min()

def any_larger_than_n(x, n=4):
    return (x>n).sum()>0

In [181]:
c = a.groupby(['t','v'])['r'].agg(['sum','mean']).reset_index().groupby('t')['sum','mean'].agg({'sum':['sum', any_larger_than_n], 'mean': [max_min]})

In [194]:
c.loc[375.0]


Out[194]:
sum   sum                  25.000000
      any_larger_than_n     1.000000
mean  max_min               0.134524
Name: 375.0, dtype: float64

In [192]:
c.sort_values([('sum','sum')], ascending=False)


Out[192]:
sum mean
sum any_larger_than_n max_min
t
1374.0 37.0 1.0 0.036885
677.0 35.0 1.0 0.031646
374.0 28.0 1.0 0.023130
886.0 27.0 1.0 0.061720
375.0 25.0 1.0 0.134524
836.0 25.0 1.0 0.022693
838.0 25.0 1.0 0.020638
187.0 24.0 1.0 0.013255
825.0 24.0 1.0 0.018771
889.0 24.0 1.0 0.020906
819.0 23.0 1.0 0.028169
618.0 22.0 1.0 0.020619
204.0 22.0 1.0 0.019108
691.0 22.0 1.0 0.020151
371.0 22.0 1.0 0.013986
369.0 21.0 1.0 0.028571
259.0 21.0 1.0 0.026866
880.0 20.0 1.0 0.083333
352.0 20.0 1.0 0.028169
895.0 20.0 1.0 0.018116
1375.0 20.0 1.0 0.024450
619.0 20.0 1.0 0.033898
877.0 20.0 1.0 0.030718
186.0 19.0 1.0 0.014925
671.0 19.0 1.0 0.018904
325.0 18.0 1.0 0.022222
268.0 18.0 1.0 0.066667
1518.0 18.0 1.0 0.017612
1477.0 18.0 1.0 0.016506
609.0 18.0 1.0 0.017505
... ... ... ...
1492.0 0.0 0.0 0.000000
1499.0 0.0 0.0 0.000000
1502.0 0.0 0.0 0.000000
1508.0 0.0 0.0 0.000000
1510.0 0.0 0.0 0.000000
1516.0 0.0 0.0 0.000000
1407.0 0.0 0.0 0.000000
1378.0 0.0 0.0 0.000000
1158.0 0.0 0.0 0.000000
1298.0 0.0 0.0 0.000000
1161.0 0.0 0.0 0.000000
1164.0 0.0 0.0 0.000000
1165.0 0.0 0.0 0.000000
1221.0 0.0 0.0 0.000000
1241.0 0.0 0.0 0.000000
1243.0 0.0 0.0 0.000000
1244.0 0.0 0.0 0.000000
1258.0 0.0 0.0 0.000000
1300.0 0.0 0.0 0.000000
1366.0 0.0 0.0 0.000000
1307.0 0.0 0.0 0.000000
1315.0 0.0 0.0 0.000000
1325.0 0.0 0.0 0.000000
1333.0 0.0 0.0 0.000000
1358.0 0.0 0.0 0.000000
1359.0 0.0 0.0 0.000000
1360.0 0.0 0.0 0.000000
1362.0 0.0 0.0 0.000000
1714.0 0.0 0.0 0.000000
901.0 NaN 0.0 NaN

684 rows × 3 columns


In [182]:
c[(c['sum']['any_larger_than_n']>0) & (c['mean']['max_min']>0.2)]


Out[182]:
sum mean
sum any_larger_than_n max_min
t

In [195]:
a[a['t']==375.0].groupby('v')['r'].agg(['count','sum','mean']).reset_index().sort_values('sum', ascending=False)


Out[195]:
v count sum mean
1 -0.216 299 6.0 0.020067
3 -0.179 568 6.0 0.010563
4 -0.161 262 6.0 0.022901
2 -0.197 415 5.0 0.012048
0 -0.234 120 1.0 0.008333
5 -0.143 7 1.0 0.142857

In [36]:
a.groupby(['t','v'])['r'].agg(['count','sum','mean']).reset_index().sort_values('sum', ascending=False)


Out[36]:
t v count sum mean
68137 1376.7 -0.156 32 9.0 0.281250
33738 738.7 -0.088 27 8.0 0.296296
39786 836.9 -0.212 14 8.0 0.571429
68140 1376.7 0.013 21 6.0 0.285714
68139 1376.7 -0.043 28 6.0 0.214286
64478 1297.6 0.125 35 6.0 0.171429
33737 738.7 -0.144 19 6.0 0.315789
68143 1376.7 0.181 23 6.0 0.260870
75659 1527.6 0.069 25 5.0 0.200000
21622 499.2 -0.032 23 5.0 0.217391
52037 1075.9 0.013 20 5.0 0.250000
34173 743.2 -0.200 24 5.0 0.208333
17632 374.1 0.024 20 5.0 0.250000
43690 941.9 0.013 23 5.0 0.217391
17621 374.0 0.080 24 5.0 0.208333
40874 891.4 -0.099 35 5.0 0.142857
17623 374.0 0.193 27 5.0 0.185185
42710 922.9 0.069 17 5.0 0.294118
69855 1400.6 0.238 20 4.0 0.200000
64322 1295.2 0.181 27 4.0 0.148148
30692 684.1 -0.257 20 4.0 0.200000
76578 1549.5 0.013 26 4.0 0.153846
30699 684.1 0.137 16 4.0 0.250000
32327 711.5 -0.088 25 4.0 0.160000
40879 891.4 0.181 33 4.0 0.121212
76200 1542.2 -0.212 20 4.0 0.200000
67963 1375.2 0.125 17 4.0 0.235294
16316 347.9 -0.200 29 4.0 0.137931
68141 1376.7 0.069 32 4.0 0.125000
38003 804.9 -0.088 36 4.0 0.111111
... ... ... ... ... ...
82568 1674.5 0.350 0 NaN NaN
82611 1674.9 0.294 0 NaN NaN
82614 1686.5 0.013 0 NaN NaN
82666 1690.9 0.125 0 NaN NaN
82667 1690.9 0.181 0 NaN NaN
82679 1691.0 0.350 0 NaN NaN
82701 1691.2 0.350 0 NaN NaN
82769 1692.8 -0.043 0 NaN NaN
82771 1692.8 0.069 0 NaN NaN
82772 1692.8 0.125 0 NaN NaN
82805 1693.1 0.406 0 NaN NaN
82830 1693.3 0.406 0 NaN NaN
82853 1693.5 0.350 0 NaN NaN
82865 1698.0 0.406 0 NaN NaN
82876 1698.1 0.350 0 NaN NaN
82898 1698.3 0.350 0 NaN NaN
82920 1698.5 0.350 0 NaN NaN
82943 1698.7 0.350 0 NaN NaN
82954 1698.8 0.350 0 NaN NaN
83021 1709.5 0.350 0 NaN NaN
83022 1709.5 0.406 0 NaN NaN
83121 1710.4 0.350 0 NaN NaN
83131 1713.1 0.294 0 NaN NaN
83229 1714.0 0.350 0 NaN NaN
83264 1716.4 -0.043 0 NaN NaN
83269 1716.4 0.238 0 NaN NaN
83292 1716.6 0.350 0 NaN NaN
83314 1716.8 0.350 0 NaN NaN
83336 1717.0 0.294 0 NaN NaN
83356 1717.7 0.238 0 NaN NaN

83432 rows × 5 columns


In [ ]:
a.groupby(['t','v'])['r'].agg(['count','sum','mean']).reset_index().sort_values('sum', ascending=False)

In [21]:
a.groupby('r').mean()


Out[21]:
ID t v next_10_num_nan next_10_num_sum previous_10_num_nan previous_10_num_sum next_20_num_nan next_20_num_sum previous_20_num_nan previous_20_num_sum next_50_num_nan next_50_num_sum previous_50_num_nan previous_50_num_sum next_100_num_nan next_100_num_sum previous_100_num_nan previous_100_num_sum
r
0.0 1184946 850.422716 0.000007 4.993937 0.028442 4.993707 0.028676 9.997885 0.057390 9.997776 0.057498 24.997970 0.144020 24.997139 0.144873 49.996758 0.288558 49.995239 0.290212
1.0 1189117 765.193294 -0.011222 4.983792 0.140673 5.023089 0.140673 9.996028 0.227009 10.011305 0.227009 24.879927 0.485793 25.012832 0.485793 49.769478 0.882829 50.023220 0.882829

Create gf0 / gf1 features based on numeric, t, numeric + t


In [204]:
#for f_nr in lut['feature_nr'].unique():
shortlist = ["L3_S33_F3857",   "L3_S32_F3850", 
"L3_S33_F3859",  "L3_S29_F3354","L3_S30_F3754", "L3_S30_F3759",  "L3_S30_F3749", "L3_S33_F3865", "L3_S30_F3804", 
"L3_S29_F3348", 
"L3_S29_F3351", "L3_S29_F3345",   "L3_S30_F3554",  
"L3_S29_F3321", "L3_S30_F3494", "L3_S29_F3342",   "L3_S29_F3373", 
"L3_S29_F3324",   "L3_S30_F3769", "L3_S29_F3427",  "L3_S30_F3829"]

for f_name in shortlist:
    # Read local error rate file for specific features
    # Extract single column
    # Add column to dataframe
    f_nr = lut[lut['name_num']==f_name].feature_nr.values[0]
    output = calculate_lead_lag_numeric(f_nr)
    
    if isinstance(output, pd.DataFrame):
    
        output = output[['ID','r','t','v']]
        output.sort_values('ID', inplace=True)

        output['t_prev'] = output['t'].shift(1)
        output['t_next'] = output['t'].shift(-1)

        output['r_prev'] = output['r'].shift(1)
        output['r_next'] = output['r'].shift(-1)

        output['v_prev'] = output['v'].shift(1)
        output['v_next'] = output['v'].shift(-1)
        
        # GF0
        output['gf0_t'] = -1 + \
                        1 * ((output['t_prev']==output['t']) & (output['r_prev'].isnull())).astype(int) + \
                        2 * ((output['t_prev']==output['t']) & (output['r_prev']==1)).astype(int)

        # GF0
        output['gf1_t'] = -1 + \
                        1 * ((output['t_next']==output['t']) & (output['r_next'].isnull())).astype(int) + \
                        2 * ((output['t_next']==output['t']) & (output['r_next']==1)).astype(int)

        output['gf0_num'] = -1 + \
                        1 * ((output['v_prev']==output['v']) & (output['r_prev'].isnull())).astype(int) + \
                        2 * ((output['v_prev']==output['v']) & (output['r_prev']==1)).astype(int)

        # GF0
        output['gf1_num'] = -1 + \
                        1 * ((output['v_next']==output['v']) & (output['r_next'].isnull())).astype(int) + \
                        2 * ((output['v_next']==output['v']) & (output['r_next']==1)).astype(int)

        output['gf0_num_t'] = -1 + \
                        1 * ((output['t_prev']==output['t']) & (output['v_prev']==output['v']) & (output['r_prev'].isnull())).astype(int) + \
                        2 * ((output['t_prev']==output['t']) & (output['v_prev']==output['v']) & (output['r_prev']==1)).astype(int)

        # GF0
        output['gf1_num_t'] = -1 + \
                        1 * ((output['t_next']==output['t']) & (output['v_next']==output['v']) & (output['r_next'].isnull())).astype(int) + \
                        2 * ((output['t_next']==output['t']) & (output['v_next']==output['v']) & (output['r_next']==1)).astype(int)
                
                
        output.set_index('ID', inplace=True)

        output.filter(like='gf').to_csv(os.path.join(const.BASE_PATH, 'feat_set_num_gf_feat_nr_{}.csv'.format(f_nr)), index_label='ID')
        #break
    else:
        print('No numeric data for feature {}'.format(f_nr))


F nr: 3857 | Feature values: 2229189 | Time values: 2229189 | Nans found: 0
F nr: 3850 | Feature values: 48678 | Time values: 48678 | Nans found: 0
F nr: 3859 | Feature values: 2229189 | Time values: 2229189 | Nans found: 0
F nr: 3354 | Feature values: 2239066 | Time values: 2239066 | Nans found: 0
F nr: 3754 | Feature values: 2238895 | Time values: 2238895 | Nans found: 17
F nr: 3759 | Feature values: 2238895 | Time values: 2238895 | Nans found: 17
F nr: 3749 | Feature values: 2238895 | Time values: 2238895 | Nans found: 17
F nr: 3865 | Feature values: 2229189 | Time values: 2229189 | Nans found: 0
F nr: 3804 | Feature values: 2239366 | Time values: 2239366 | Nans found: 17
F nr: 3348 | Feature values: 2239066 | Time values: 2239066 | Nans found: 0
F nr: 3351 | Feature values: 2239066 | Time values: 2239066 | Nans found: 0
F nr: 3345 | Feature values: 2239066 | Time values: 2239066 | Nans found: 0
F nr: 3554 | Feature values: 2238895 | Time values: 2238895 | Nans found: 17
F nr: 3321 | Feature values: 2239066 | Time values: 2239066 | Nans found: 0
F nr: 3494 | Feature values: 2239349 | Time values: 2239349 | Nans found: 0
F nr: 3342 | Feature values: 2239066 | Time values: 2239066 | Nans found: 0
F nr: 3373 | Feature values: 2239066 | Time values: 2239066 | Nans found: 0
F nr: 3324 | Feature values: 2239066 | Time values: 2239066 | Nans found: 0
F nr: 3769 | Feature values: 2239366 | Time values: 2239366 | Nans found: 17
F nr: 3427 | Feature values: 2239066 | Time values: 2239066 | Nans found: 0
F nr: 3829 | Feature values: 2239366 | Time values: 2239366 | Nans found: 17

View feature results


In [26]:
#for f_nr in lut['feature_nr'].unique():
shortlist = ["L3_S33_F3857",   "L3_S32_F3850", 
"L3_S33_F3859",  "L3_S29_F3354","L3_S30_F3754", "L3_S30_F3759",  "L3_S30_F3749", "L3_S33_F3865", "L3_S30_F3804", 
"L3_S29_F3348", 
"L3_S29_F3351", "L3_S29_F3345",   "L3_S30_F3554",  
"L3_S29_F3321", "L3_S30_F3494", "L3_S29_F3342",   "L3_S29_F3373", 
"L3_S29_F3324",   "L3_S30_F3769", "L3_S29_F3427",  "L3_S30_F3829"]

for f_name in shortlist:
    # Read local error rate file for specific features
    # Extract single column
    # Add column to dataframe
    f_nr = lut[lut['name_num']==f_name].feature_nr.values[0]
    output = calculate_lead_lag_numeric(f_nr)
    
    if isinstance(output, pd.DataFrame):
    
        print(output.groupby('r').mean().filter(like='_sum'))

        print('Value counts next 10')
        print(output['next_10_num_sum'].value_counts())

        print('Value counts next 100')
        print(output['next_100_num_sum'].value_counts())

        #output.to_csv(os.path.join(const.BASE_PATH, 'feat_set_next_previous_num_feat_nr_{}.csv'.format(f_nr)))
        output[['ID','r','t','v']].to_csv(os.path.join(const.BASE_PATH, 'feat_set_next_previous_num_feat_nr_{}.csv'.format(f_nr)), index=False)
    else:
        print('No numeric data for feature {}'.format(f_nr))


F nr: 3857 | Feature values: 2229189 | Time values: 2229189 | Nans found: 0
     next_10_num_sum  previous_10_num_sum  next_20_num_sum  \
r                                                            
0.0         0.024032             0.024111         0.048166   
1.0         0.206467             0.206392         0.364292   

     previous_20_num_sum  next_50_num_sum  previous_50_num_sum  \
r                                                                
0.0             0.048401         0.121569             0.121646   
1.0             0.364226         0.765236             0.765236   

     next_100_num_sum  previous_100_num_sum  
r                                            
0.0          0.243593              0.244388  
1.0          1.234043              1.234043  
Value counts next 10
0.0    2176437
1.0      46419
2.0       3523
3.0        486
4.0        101
5.0         19
6.0          4
7.0          2
Name: next_10_num_sum, dtype: int64
Value counts next 100
0.0     1847760
1.0      278127
2.0       72019
3.0       18376
4.0        5816
5.0        2561
6.0        1332
7.0         983
8.0         668
9.0         333
10.0        210
11.0        164
14.0        127
12.0        110
15.0         93
13.0         83
21.0         67
16.0         61
20.0         52
22.0         40
18.0         39
19.0         37
17.0         35
23.0         29
24.0         29
25.0         23
26.0          8
27.0          1
Name: next_100_num_sum, dtype: int64
F nr: 3850 | Feature values: 48678 | Time values: 48678 | Nans found: 0
     next_10_num_sum  previous_10_num_sum  next_20_num_sum  \
r                                                            
0.0         0.206227             0.209951         0.422000   
1.0         0.614480             0.613924         1.104882   

     previous_20_num_sum  next_50_num_sum  previous_50_num_sum  \
r                                                                
0.0             0.428785         1.081584             1.084656   
1.0             1.104882         2.304702             2.304702   

     next_100_num_sum  previous_100_num_sum  
r                                            
0.0          2.214755              2.203405  
1.0          3.978300              3.978300  
Value counts next 10
0.0    40274
1.0     6235
2.0     1641
3.0      362
4.0       93
5.0       17
Name: next_10_num_sum, dtype: int64
Value counts next 100
0.0     13425
1.0     11261
2.0      7926
3.0      5027
4.0      3557
5.0      2433
6.0      1565
7.0      1149
8.0       592
10.0      430
9.0       427
11.0      309
12.0      188
13.0      125
14.0       50
15.0       49
16.0       44
18.0       29
17.0       28
22.0       19
19.0       17
21.0       17
20.0        7
23.0        2
Name: next_100_num_sum, dtype: int64
F nr: 3859 | Feature values: 2229189 | Time values: 2229189 | Nans found: 0
     next_10_num_sum  previous_10_num_sum  next_20_num_sum  \
r                                                            
0.0         0.024079             0.024094         0.048461   
1.0         0.202634             0.202781         0.356473   

     previous_20_num_sum  next_50_num_sum  previous_50_num_sum  \
r                                                                
0.0             0.048280         0.121526             0.121311   
1.0             0.356473         0.769023             0.769023   

     next_100_num_sum  previous_100_num_sum  
r                                            
0.0          0.243676              0.244193  
1.0          1.236026              1.236026  
Value counts next 10
0.0    2176266
1.0      46596
2.0       3510
3.0        425
4.0        108
5.0         15
6.0          8
7.0          2
Name: next_10_num_sum, dtype: int64
Value counts next 100
0.0     1847582
1.0      277773
2.0       73030
3.0       18259
4.0        5529
5.0        2378
6.0        1558
7.0         865
8.0         655
9.0         248
10.0        235
11.0        227
12.0        138
14.0        123
13.0         84
16.0         66
21.0         63
22.0         63
17.0         59
15.0         57
23.0         47
20.0         46
19.0         34
18.0         28
24.0         19
25.0         11
26.0          8
Name: next_100_num_sum, dtype: int64
F nr: 3354 | Feature values: 2239066 | Time values: 2239066 | Nans found: 0
     next_10_num_sum  previous_10_num_sum  next_20_num_sum  \
r                                                            
0.0         0.028142             0.028430         0.056917   
1.0         0.190032             0.190003         0.336847   

     previous_20_num_sum  next_50_num_sum  previous_50_num_sum  \
r                                                                
0.0             0.056971         0.143177             0.143127   
1.0             0.336847         0.719829             0.719829   

     next_100_num_sum  previous_100_num_sum  
r                                            
0.0          0.287189              0.288194  
1.0          1.161473              1.161473  
Value counts next 10
0.0    2177306
1.0      54516
2.0       4438
3.0        549
4.0         81
5.0         13
6.0          3
7.0          2
Name: next_10_num_sum, dtype: int64
Value counts next 100
0.0     1793242
1.0      318349
2.0       88332
3.0       23475
4.0        7731
5.0        2958
6.0        1650
7.0         995
8.0         707
9.0         379
10.0        307
11.0        160
12.0        121
14.0        109
13.0        104
15.0         73
17.0         72
18.0         62
16.0         57
20.0         36
19.0         28
22.0         28
23.0         23
24.0         23
25.0         20
21.0         20
26.0          3
Name: next_100_num_sum, dtype: int64
F nr: 3754 | Feature values: 2238895 | Time values: 2238895 | Nans found: 17
     next_10_num_sum  previous_10_num_sum  next_20_num_sum  \
r                                                            
0.0         0.028344             0.028478         0.057081   
1.0         0.214548             0.214613         0.365440   

     previous_20_num_sum  next_50_num_sum  previous_50_num_sum  \
r                                                                
0.0             0.056937         0.143237             0.143015   
1.0             0.365440         0.747825             0.747825   

     next_100_num_sum  previous_100_num_sum  
r                                            
0.0          0.287572              0.287513  
1.0          1.198443              1.198443  
Value counts next 10
0.0    2177367
1.0      53921
2.0       4644
3.0        555
4.0        120
5.0         25
7.0          2
6.0          2
Name: next_10_num_sum, dtype: int64
Value counts next 100
0.0     1794022
1.0      318588
2.0       87156
3.0       22957
4.0        7780
5.0        3259
6.0        1656
7.0         981
8.0         691
9.0         411
10.0        267
11.0        244
12.0        137
14.0        126
13.0        103
15.0         80
16.0         80
17.0         65
19.0         57
21.0         52
18.0         49
20.0         44
22.0         33
27.0         16
25.0         15
26.0          9
24.0          6
23.0          5
28.0          5
Name: next_100_num_sum, dtype: int64
F nr: 3759 | Feature values: 2238895 | Time values: 2238895 | Nans found: 17
     next_10_num_sum  previous_10_num_sum  next_20_num_sum  \
r                                                            
0.0          0.02843             0.028475         0.056905   
1.0          0.20281             0.202779         0.366662   

     previous_20_num_sum  next_50_num_sum  previous_50_num_sum  \
r                                                                
0.0             0.057018         0.143169             0.143241   
1.0             0.366662         0.747825             0.747825   

     next_100_num_sum  previous_100_num_sum  
r                                            
0.0          0.287490              0.287555  
1.0          1.198443              1.198443  
Value counts next 10
0.0    2177178
1.0      54367
2.0       4515
3.0        503
4.0        125
5.0         14
7.0          2
6.0          2
Name: next_10_num_sum, dtype: int64
Value counts next 100
0.0     1794210
1.0      318706
2.0       86211
3.0       23409
4.0        8129
5.0        3152
6.0        1667
7.0         921
8.0         677
9.0         464
10.0        266
11.0        238
12.0        120
14.0        113
15.0         98
13.0         71
21.0         70
18.0         68
16.0         63
19.0         59
17.0         54
20.0         54
22.0         21
27.0         20
26.0         11
23.0          9
25.0          9
24.0          4
Name: next_100_num_sum, dtype: int64
F nr: 3749 | Feature values: 2238895 | Time values: 2238895 | Nans found: 17
     next_10_num_sum  previous_10_num_sum  next_20_num_sum  \
r                                                            
0.0         0.028407             0.028314         0.057004   
1.0         0.201039             0.201039         0.351244   

     previous_20_num_sum  next_50_num_sum  previous_50_num_sum  \
r                                                                
0.0             0.057030         0.143101             0.143181   
1.0             0.351244         0.741719             0.741719   

     next_100_num_sum  previous_100_num_sum  
r                                            
0.0          0.287279              0.287925  
1.0          1.197374              1.197374  
Value counts next 10
0.0    2177246
1.0      54227
2.0       4597
3.0        467
4.0        135
5.0         22
6.0          4
7.0          2
Name: next_10_num_sum, dtype: int64
Value counts next 100
0.0     1793983
1.0      318513
2.0       86692
3.0       23722
4.0        7729
5.0        3153
6.0        1608
7.0        1072
8.0         636
9.0         456
10.0        278
11.0        209
12.0        148
13.0        130
14.0         83
15.0         67
17.0         65
21.0         63
16.0         50
19.0         48
20.0         39
22.0         39
23.0         36
18.0         33
24.0         22
26.0         11
25.0          6
27.0          2
Name: next_100_num_sum, dtype: int64
F nr: 3865 | Feature values: 2229189 | Time values: 2229189 | Nans found: 0
     next_10_num_sum  previous_10_num_sum  next_20_num_sum  \
r                                                            
0.0         0.023898             0.024006         0.048015   
1.0         0.199314             0.199242         0.357555   

     previous_20_num_sum  next_50_num_sum  previous_50_num_sum  \
r                                                                
0.0             0.048295         0.121501             0.121625   
1.0             0.357555         0.768662             0.768662   

     next_100_num_sum  previous_100_num_sum  
r                                            
0.0          0.243481              0.244478  
1.0          1.232600              1.232600  
Value counts next 10
0.0    2176228
1.0      46560
2.0       3551
3.0        457
4.0         94
5.0          5
7.0          2
6.0          2
Name: next_10_num_sum, dtype: int64
Value counts next 100
0.0     1848047
1.0      277572
2.0       72077
3.0       18713
4.0        5885
5.0        2327
6.0        1409
7.0         991
8.0         598
9.0         328
10.0        227
14.0        138
11.0        136
12.0        105
13.0         99
16.0         83
15.0         82
21.0         75
20.0         51
18.0         49
22.0         48
17.0         45
23.0         43
19.0         28
24.0         18
25.0         12
Name: next_100_num_sum, dtype: int64
F nr: 3804 | Feature values: 2239366 | Time values: 2239366 | Nans found: 17
     next_10_num_sum  previous_10_num_sum  next_20_num_sum  \
r                                                            
0.0         0.028152             0.028311         0.056640   
1.0         0.201436             0.201651         0.360098   

     previous_20_num_sum  next_50_num_sum  previous_50_num_sum  \
r                                                                
0.0             0.057064         0.142806             0.143313   
1.0             0.360098         0.744619             0.744619   

     next_100_num_sum  previous_100_num_sum  
r                                            
0.0          0.287068              0.288117  
1.0          1.192337              1.192337  
Value counts next 10
0.0    2177616
1.0      54145
2.0       4685
3.0        521
4.0         94
5.0          6
7.0          2
6.0          2
Name: next_10_num_sum, dtype: int64
Value counts next 100
0.0     1795750
1.0      316533
2.0       86777
3.0       24043
4.0        8105
5.0        2904
6.0        1935
7.0         910
8.0         745
9.0         384
10.0        263
11.0        219
16.0        122
12.0        105
14.0         80
21.0         76
13.0         74
20.0         59
15.0         55
19.0         55
18.0         46
22.0         39
17.0         37
24.0         21
23.0         19
25.0          9
Name: next_100_num_sum, dtype: int64
F nr: 3348 | Feature values: 2239066 | Time values: 2239066 | Nans found: 0
     next_10_num_sum  previous_10_num_sum  next_20_num_sum  \
r                                                            
0.0         0.028227             0.028292         0.056715   
1.0         0.190789             0.190819         0.340819   

     previous_20_num_sum  next_50_num_sum  previous_50_num_sum  \
r                                                                
0.0             0.056876         0.143336             0.143174   
1.0             0.340819         0.712343             0.712343   

     next_100_num_sum  previous_100_num_sum  
r                                            
0.0          0.287551              0.287794  
1.0          1.160403              1.160403  
Value counts next 10
0.0    2177249
1.0      54397
2.0       4611
3.0        480
4.0         81
5.0          9
6.0          3
7.0          2
Name: next_10_num_sum, dtype: int64
Value counts next 100
0.0     1793338
1.0      318725
2.0       87694
3.0       23417
4.0        7797
5.0        3120
6.0        1726
7.0         825
8.0         749
9.0         412
11.0        234
10.0        216
12.0        179
15.0        118
13.0        112
14.0         75
17.0         59
16.0         47
23.0         43
22.0         39
21.0         36
18.0         32
20.0         25
19.0         22
24.0         17
25.0          6
Name: next_100_num_sum, dtype: int64
F nr: 3351 | Feature values: 2239066 | Time values: 2239066 | Nans found: 0
     next_10_num_sum  previous_10_num_sum  next_20_num_sum  \
r                                                            
0.0         0.028377             0.028407         0.056947   
1.0         0.194461             0.194343         0.340972   

     previous_20_num_sum  next_50_num_sum  previous_50_num_sum  \
r                                                                
0.0             0.056945         0.143221             0.143148   
1.0             0.340972         0.706233             0.706233   

     next_100_num_sum  previous_100_num_sum  
r                                            
0.0          0.287759               0.28791  
1.0          1.156890               1.15689  
Value counts next 10
0.0    2177184
1.0      54633
2.0       4425
3.0        459
4.0        106
5.0         30
7.0          2
6.0          2
Name: next_10_num_sum, dtype: int64
Value counts next 100
0.0     1792818
1.0      319309
2.0       87839
3.0       23427
4.0        7507
5.0        3034
6.0        1858
7.0         910
8.0         743
9.0         355
10.0        295
12.0        166
11.0        164
14.0        100
17.0         95
13.0         92
15.0         91
18.0         61
16.0         48
23.0         41
19.0         36
20.0         29
22.0         20
21.0         12
24.0         11
25.0          3
Name: next_100_num_sum, dtype: int64
F nr: 3345 | Feature values: 2239066 | Time values: 2239066 | Nans found: 0
     next_10_num_sum  previous_10_num_sum  next_20_num_sum  \
r                                                            
0.0         0.028195             0.028272         0.056725   
1.0         0.189115             0.189144         0.339138   

     previous_20_num_sum  next_50_num_sum  previous_50_num_sum  \
r                                                                
0.0             0.056878         0.143440             0.143000   
1.0             0.339138         0.714177             0.714177   

     next_100_num_sum  previous_100_num_sum  
r                                            
0.0          0.287688              0.287584  
1.0          1.157806              1.157806  
Value counts next 10
0.0    2177244
1.0      54542
2.0       4482
3.0        489
4.0         94
5.0         17
7.0          2
6.0          2
Name: next_10_num_sum, dtype: int64
Value counts next 100
0.0     1793137
1.0      319314
2.0       87133
3.0       23605
4.0        7755
5.0        3199
6.0        1652
7.0         868
8.0         719
9.0         420
10.0        260
11.0        202
12.0        147
14.0        110
15.0         96
13.0         92
16.0         69
17.0         63
22.0         47
21.0         45
18.0         34
20.0         28
24.0         23
19.0         20
23.0         14
25.0          9
26.0          2
Name: next_100_num_sum, dtype: int64
F nr: 3554 | Feature values: 2238895 | Time values: 2238895 | Nans found: 17
     next_10_num_sum  previous_10_num_sum  next_20_num_sum  \
r                                                            
0.0         0.028529             0.028388         0.057167   
1.0         0.204827             0.204952         0.360556   

     previous_20_num_sum  next_50_num_sum  previous_50_num_sum  \
r                                                                
0.0             0.056839         0.143152             0.143124   
1.0             0.360556         0.747062             0.747062   

     next_100_num_sum  previous_100_num_sum  
r                                            
0.0          0.287152              0.287623  
1.0          1.207907              1.207907  
Value counts next 10
0.0    2177311
1.0      54269
2.0       4629
3.0        455
4.0         85
5.0         42
6.0          9
7.0          2
Name: next_10_num_sum, dtype: int64
Value counts next 100
0.0     1794478
1.0      318039
2.0       86573
3.0       23836
4.0        7648
5.0        3266
6.0        1546
7.0        1036
8.0         725
9.0         353
11.0        248
10.0        241
12.0        131
14.0        116
16.0        114
13.0         98
15.0         76
19.0         66
17.0         64
21.0         60
20.0         44
18.0         37
23.0         29
22.0         23
24.0         22
25.0         17
26.0          2
Name: next_100_num_sum, dtype: int64
F nr: 3321 | Feature values: 2239066 | Time values: 2239066 | Nans found: 0
     next_10_num_sum  previous_10_num_sum  next_20_num_sum  \
r                                                            
0.0         0.028262             0.028454         0.056934   
1.0         0.191808             0.191955         0.341277   

     previous_20_num_sum  next_50_num_sum  previous_50_num_sum  \
r                                                                
0.0             0.056796         0.143216             0.143423   
1.0             0.341277         0.710357             0.710357   

     next_100_num_sum  previous_100_num_sum  
r                                            
0.0          0.287199              0.288111  
1.0          1.154445              1.154445  
Value counts next 10
0.0    2177158
1.0      54731
2.0       4399
3.0        465
4.0         93
5.0         24
6.0          5
7.0          2
Name: next_10_num_sum, dtype: int64
Value counts next 100
0.0     1792006
1.0      320860
2.0       87088
3.0       23543
4.0        7558
5.0        2861
6.0        1907
7.0         970
8.0         612
9.0         429
12.0        204
10.0        186
11.0        169
14.0        111
13.0         95
15.0         95
17.0         87
16.0         65
18.0         64
19.0         32
22.0         31
23.0         27
20.0         26
21.0         25
24.0         11
25.0          3
Name: next_100_num_sum, dtype: int64
F nr: 3494 | Feature values: 2239349 | Time values: 2239349 | Nans found: 0
     next_10_num_sum  previous_10_num_sum  next_20_num_sum  \
r                                                            
0.0         0.028357             0.028428         0.056953   
1.0         0.200795             0.200856         0.355518   

     previous_20_num_sum  next_50_num_sum  previous_50_num_sum  \
r                                                                
0.0             0.056941         0.143272             0.143179   
1.0             0.355518         0.740192             0.740192   

     next_100_num_sum  previous_100_num_sum  
r                                            
0.0          0.287308              0.287657  
1.0          1.199512              1.199512  
Value counts next 10
0.0    2177509
1.0      54387
2.0       4494
3.0        505
4.0        123
5.0         18
6.0          4
7.0          2
Name: next_10_num_sum, dtype: int64
Value counts next 100
0.0     1795193
1.0      317602
2.0       86784
3.0       23527
4.0        7830
5.0        3181
6.0        1711
7.0         968
8.0         839
9.0         346
10.0        291
11.0        210
12.0        187
14.0        110
13.0         85
17.0         70
15.0         64
21.0         61
19.0         52
16.0         46
18.0         45
22.0         43
20.0         41
23.0         21
25.0         15
24.0         11
26.0          5
27.0          3
Name: next_100_num_sum, dtype: int64
F nr: 3342 | Feature values: 2239066 | Time values: 2239066 | Nans found: 0
     next_10_num_sum  previous_10_num_sum  next_20_num_sum  \
r                                                            
0.0         0.028252             0.028507         0.056760   
1.0         0.185684             0.185769         0.333486   

     previous_20_num_sum  next_50_num_sum  previous_50_num_sum  \
r                                                                
0.0             0.057052         0.143111             0.142984   
1.0             0.333486         0.710816             0.710816   

     next_100_num_sum  previous_100_num_sum  
r                                            
0.0          0.287520              0.287646  
1.0          1.158265              1.158265  
Value counts next 10
0.0    2177332
1.0      54536
2.0       4555
3.0        448
4.0         86
5.0         20
7.0          2
6.0          2
Name: next_10_num_sum, dtype: int64
Value counts next 100
0.0     1792365
1.0      320355
2.0       86796
3.0       23908
4.0        7643
5.0        3022
6.0        1664
7.0        1024
8.0         654
9.0         400
10.0        278
11.0        216
12.0        164
13.0         98
15.0         94
20.0         69
16.0         45
21.0         43
18.0         39
17.0         38
14.0         36
23.0         29
22.0         25
19.0         24
24.0         22
25.0         14
Name: next_100_num_sum, dtype: int64
F nr: 3373 | Feature values: 2239066 | Time values: 2239066 | Nans found: 0
     next_10_num_sum  previous_10_num_sum  next_20_num_sum  \
r                                                            
0.0         0.028293             0.028487         0.056936   
1.0         0.201223             0.201285         0.341277   

     previous_20_num_sum  next_50_num_sum  previous_50_num_sum  \
r                                                                
0.0             0.057076         0.143062             0.143369   
1.0             0.341277         0.716621             0.716621   

     next_100_num_sum  previous_100_num_sum  
r                                            
0.0          0.287407              0.287924  
1.0          1.153223              1.153223  
Value counts next 10
0.0    2177345
1.0      54051
2.0       4774
3.0        446
4.0         98
5.0         21
7.0          2
6.0          2
Name: next_10_num_sum, dtype: int64
Value counts next 100
0.0     1793531
1.0      318500
2.0       87461
3.0       23907
4.0        7464
5.0        3103
6.0        1797
7.0         927
8.0         708
9.0         404
10.0        256
11.0        228
12.0        148
15.0        135
13.0        116
17.0         66
23.0         61
16.0         57
14.0         47
19.0         43
18.0         28
20.0         25
22.0         23
21.0         19
24.0         10
25.0          1
Name: next_100_num_sum, dtype: int64
F nr: 3324 | Feature values: 2239066 | Time values: 2239066 | Nans found: 0
     next_10_num_sum  previous_10_num_sum  next_20_num_sum  \
r                                                            
0.0         0.028319             0.028522         0.057017   
1.0         0.192296             0.192325         0.343110   

     previous_20_num_sum  next_50_num_sum  previous_50_num_sum  \
r                                                                
0.0             0.057116         0.143108             0.143253   
1.0             0.343110         0.709288             0.709288   

     next_100_num_sum  previous_100_num_sum  
r                                            
0.0          0.287529              0.287928  
1.0          1.159792              1.159792  
Value counts next 10
0.0    2177317
1.0      54385
2.0       4535
3.0        505
4.0        101
5.0         12
7.0          2
6.0          2
Name: next_10_num_sum, dtype: int64
Value counts next 100
0.0     1792940
1.0      319191
2.0       87668
3.0       23933
4.0        7063
5.0        3126
6.0        1717
7.0        1135
8.0         585
9.0         434
10.0        268
12.0        208
11.0        173
15.0        138
13.0        105
14.0         70
17.0         62
18.0         48
16.0         38
22.0         38
20.0         30
21.0         26
23.0         25
19.0         16
24.0         13
25.0          7
26.0          6
Name: next_100_num_sum, dtype: int64
F nr: 3769 | Feature values: 2239366 | Time values: 2239366 | Nans found: 17
     next_10_num_sum  previous_10_num_sum  next_20_num_sum  \
r                                                            
0.0         0.028326             0.028332         0.056795   
1.0         0.203118             0.203056         0.357350   

     previous_20_num_sum  next_50_num_sum  previous_50_num_sum  \
r                                                                
0.0             0.056873         0.142759             0.143177   
1.0             0.357350         0.751030             0.751030   

     next_100_num_sum  previous_100_num_sum  
r                                            
0.0          0.287168              0.287761  
1.0          1.205160              1.205160  
Value counts next 10
0.0    2177796
1.0      54197
2.0       4608
3.0        523
4.0         94
5.0         24
6.0          3
7.0          2
Name: next_10_num_sum, dtype: int64
Value counts next 100
0.0     1794988
1.0      317547
2.0       87382
3.0       23448
4.0        7764
5.0        2864
6.0        1903
7.0         985
8.0         732
9.0         428
10.0        256
11.0        231
12.0        139
13.0        109
16.0        106
14.0         72
21.0         65
15.0         64
17.0         58
20.0         50
23.0         36
22.0         34
18.0         32
24.0         29
19.0         28
25.0         11
26.0          4
Name: next_100_num_sum, dtype: int64
F nr: 3427 | Feature values: 2239066 | Time values: 2239066 | Nans found: 0
     next_10_num_sum  previous_10_num_sum  next_20_num_sum  \
r                                                            
0.0         0.028292             0.028437         0.056978   
1.0         0.191561             0.191590         0.342346   

     previous_20_num_sum  next_50_num_sum  previous_50_num_sum  \
r                                                                
0.0             0.056951         0.143042             0.143095   
1.0             0.342346         0.710052             0.710052   

     next_100_num_sum  previous_100_num_sum  
r                                            
0.0          0.287351              0.288044  
1.0          1.152154              1.152154  
Value counts next 10
0.0    2177290
1.0      54582
2.0       4564
3.0        412
4.0         92
5.0         24
7.0          2
6.0          2
Name: next_10_num_sum, dtype: int64
Value counts next 100
0.0     1793135
1.0      319180
2.0       87257
3.0       23647
4.0        7683
5.0        3119
6.0        1753
7.0         941
8.0         705
9.0         358
10.0        248
11.0        199
12.0        195
13.0        111
14.0         94
15.0         91
16.0         70
17.0         62
18.0         56
19.0         44
20.0         37
21.0         30
22.0         21
23.0         19
24.0          9
Name: next_100_num_sum, dtype: int64
F nr: 3829 | Feature values: 2239366 | Time values: 2239366 | Nans found: 17
     next_10_num_sum  previous_10_num_sum  next_20_num_sum  \
r                                                            
0.0         0.028488             0.028309         0.057114   
1.0         0.204584             0.204803         0.355518   

     previous_20_num_sum  next_50_num_sum  previous_50_num_sum  \
r                                                                
0.0             0.056663         0.143450             0.142872   
1.0             0.355573         0.756678             0.756794   

     next_100_num_sum  previous_100_num_sum  
r                                            
0.0          0.287757              0.287441  
1.0          1.201801              1.201985  
Value counts next 10
0.0    2177636
1.0      54148
2.0       4639
3.0        502
4.0        117
5.0         15
7.0          2
6.0          2
Name: next_10_num_sum, dtype: int64
Value counts next 100
0.0     1794357
1.0      318920
2.0       86614
3.0       23767
4.0        7271
5.0        3295
6.0        1623
7.0         983
8.0         712
9.0         434
10.0        281
11.0        228
12.0        171
13.0         95
14.0         88
17.0         86
16.0         84
15.0         71
19.0         50
18.0         46
21.0         41
24.0         35
20.0         31
23.0         27
25.0         24
22.0         13
27.0          6
26.0          6
28.0          5
Name: next_100_num_sum, dtype: int64

In [28]:
a.head()


Out[28]:
ID r t v next_10_num_nan next_10_num_sum previous_10_num_nan previous_10_num_sum next_20_num_nan next_20_num_sum previous_20_num_nan previous_20_num_sum next_50_num_nan next_50_num_sum previous_50_num_nan previous_50_num_sum next_100_num_nan next_100_num_sum previous_100_num_nan previous_100_num_sum
1381888 555356 NaN 0.38 -0.200 6.0 0.0 NaN NaN 12.0 0.0 NaN NaN 25.0 1.0 NaN NaN 53.0 1.0 NaN NaN
1381889 555357 NaN 0.38 -0.200 5.0 0.0 NaN NaN 11.0 0.0 NaN NaN 24.0 1.0 NaN NaN 52.0 1.0 NaN NaN
2224302 2336275 NaN 0.38 -0.200 5.0 0.0 NaN NaN 11.0 0.0 NaN NaN 24.0 1.0 NaN NaN 51.0 1.0 NaN NaN
232262 492065 0.0 0.38 -0.144 6.0 0.0 NaN NaN 12.0 0.0 NaN NaN 25.0 1.0 NaN NaN 51.0 1.0 NaN NaN
194781 412433 0.0 0.38 -0.088 6.0 0.0 3.0 0.0 13.0 0.0 3.0 0.0 26.0 1.0 3.0 0.0 51.0 1.0 3.0 0.0

In [22]:
a.groupby('r').mean().filter(like='_sum')


Out[22]:
next_10_num_sum previous_10_num_sum next_20_num_sum previous_20_num_sum next_50_num_sum previous_50_num_sum next_100_num_sum previous_100_num_sum
r
0.0 0.028142 0.028430 0.056917 0.056971 0.143177 0.143127 0.287189 0.288194
1.0 0.190032 0.190003 0.336847 0.336847 0.719829 0.719829 1.161473 1.161473

In [26]:
print('Value counts next 10')
print(a['next_10_num_sum'].value_counts())


Value counts next 10
0.0    2177306
1.0      54516
2.0       4438
3.0        549
4.0         81
5.0         13
6.0          3
7.0          2
Name: next_10_num_sum, dtype: int64

In [27]:
print('Value counts next 100')
print(a['next_100_num_sum'].value_counts())


Value counts next 100
0.0     1793242
1.0      318349
2.0       88332
3.0       23475
4.0        7731
5.0        2958
6.0        1650
7.0         995
8.0         707
9.0         379
10.0        307
11.0        160
12.0        121
14.0        109
13.0        104
15.0         73
17.0         72
18.0         62
16.0         57
20.0         36
19.0         28
22.0         28
23.0         23
24.0         23
25.0         20
21.0         20
26.0          3
Name: next_100_num_sum, dtype: int64

Evaluate various features


In [41]:
cols = ['next_10_num_nan', 'next_10_num_sum', 'previous_10_num_nan', 'previous_10_num_sum',
 'next_20_num_nan', 'next_20_num_sum', 'previous_20_num_nan', 'previous_20_num_sum',
 'next_50_num_nan', 'next_50_num_sum', 'previous_50_num_nan', 'previous_50_num_sum',
 'next_100_num_nan', 'next_100_num_sum', 'previous_100_num_nan', 'previous_100_num_sum']

In [48]:
agg_0 = pd.DataFrame(index=lut['feature_nr'].unique(), columns = cols)
agg_1 = pd.DataFrame(index=lut['feature_nr'].unique(), columns = cols)

In [61]:
for f_nr in lut['feature_nr'].unique():
    print('Extracting {}'.format(f_nr))
    
    f_file = os.path.join(const.BASE_PATH, 
                          'feat_set_next_previous_num_feat_nr_{}.csv'.format(f_nr))
    
    if not os.path.exists(f_file):
        continue
                          
    output = pd.read_csv(f_file, index_col='ID')
    
    output = output.groupby('r')[cols].mean()
    
    agg_0.loc[f_nr] = output.loc[0.0]
    agg_1.loc[f_nr] = output.loc[1.0]


Extracting 0
Extracting 2
Extracting 4
Extracting 6
Extracting 8
Extracting 10
Extracting 12
Extracting 14
Extracting 16
Extracting 18
Extracting 20
Extracting 22
Extracting 24
Extracting 25
Extracting 27
Extracting 28
Extracting 29
Extracting 31
Extracting 32
Extracting 33
Extracting 35
Extracting 36
Extracting 37
Extracting 39
Extracting 40
Extracting 41
Extracting 43
Extracting 44
Extracting 45
Extracting 47
Extracting 48
Extracting 49
Extracting 51
Extracting 52
Extracting 53
Extracting 55
Extracting 56
Extracting 57
Extracting 59
Extracting 60
Extracting 61
Extracting 63
Extracting 64
Extracting 65
Extracting 67
Extracting 68
Extracting 69
Extracting 71
Extracting 72
Extracting 73
Extracting 75
Extracting 76
Extracting 77
Extracting 79
Extracting 80
Extracting 81
Extracting 83
Extracting 84
Extracting 85
Extracting 87
Extracting 88
Extracting 89
Extracting 91
Extracting 92
Extracting 93
Extracting 95
Extracting 96
Extracting 97
Extracting 99
Extracting 100
Extracting 101
Extracting 103
Extracting 104
Extracting 105
Extracting 107
Extracting 108
Extracting 109
Extracting 110
Extracting 112
Extracting 113
Extracting 114
Extracting 116
Extracting 118
Extracting 119
Extracting 121
Extracting 122
Extracting 123
Extracting 125
Extracting 126
Extracting 128
Extracting 129
Extracting 131
Extracting 132
Extracting 133
Extracting 135
Extracting 136
Extracting 138
Extracting 139
Extracting 140
Extracting 142
Extracting 144
Extracting 146
Extracting 147
Extracting 149
Extracting 151
Extracting 153
Extracting 154
Extracting 155
Extracting 156
Extracting 158
Extracting 159
Extracting 160
Extracting 161
Extracting 163
Extracting 164
Extracting 165
Extracting 166
Extracting 168
Extracting 169
Extracting 170
Extracting 171
Extracting 173
Extracting 174
Extracting 175
Extracting 176
Extracting 178
Extracting 179
Extracting 180
Extracting 181
Extracting 183
Extracting 184
Extracting 185
Extracting 186
Extracting 188
Extracting 189
Extracting 190
Extracting 191
Extracting 193
Extracting 194
Extracting 195
Extracting 196
Extracting 198
Extracting 199
Extracting 200
Extracting 201
Extracting 203
Extracting 204
Extracting 205
Extracting 206
Extracting 208
Extracting 209
Extracting 210
Extracting 211
Extracting 213
Extracting 214
Extracting 215
Extracting 217
Extracting 218
Extracting 219
Extracting 220
Extracting 222
Extracting 223
Extracting 224
Extracting 225
Extracting 227
Extracting 228
Extracting 229
Extracting 230
Extracting 232
Extracting 233
Extracting 234
Extracting 235
Extracting 237
Extracting 238
Extracting 239
Extracting 240
Extracting 242
Extracting 243
Extracting 244
Extracting 245
Extracting 247
Extracting 248
Extracting 249
Extracting 250
Extracting 252
Extracting 253
Extracting 254
Extracting 255
Extracting 257
Extracting 258
Extracting 259
Extracting 260
Extracting 262
Extracting 263
Extracting 264
Extracting 265
Extracting 267
Extracting 268
Extracting 269
Extracting 270
Extracting 272
Extracting 273
Extracting 274
Extracting 275
Extracting 277
Extracting 278
Extracting 279
Extracting 281
Extracting 282
Extracting 283
Extracting 285
Extracting 286
Extracting 287
Extracting 289
Extracting 290
Extracting 291
Extracting 293
Extracting 294
Extracting 295
Extracting 297
Extracting 298
Extracting 299
Extracting 301
Extracting 302
Extracting 303
Extracting 305
Extracting 306
Extracting 307
Extracting 309
Extracting 310
Extracting 311
Extracting 313
Extracting 314
Extracting 315
Extracting 317
Extracting 318
Extracting 319
Extracting 321
Extracting 322
Extracting 323
Extracting 325
Extracting 326
Extracting 327
Extracting 329
Extracting 330
Extracting 332
Extracting 334
Extracting 336
Extracting 338
Extracting 340
Extracting 342
Extracting 344
Extracting 346
Extracting 348
Extracting 350
Extracting 352
Extracting 354
Extracting 356
Extracting 358
Extracting 359
Extracting 361
Extracting 362
Extracting 363
Extracting 365
Extracting 366
Extracting 367
Extracting 369
Extracting 370
Extracting 371
Extracting 373
Extracting 374
Extracting 375
Extracting 377
Extracting 378
Extracting 379
Extracting 381
Extracting 382
Extracting 383
Extracting 385
Extracting 386
Extracting 387
Extracting 389
Extracting 390
Extracting 391
Extracting 393
Extracting 394
Extracting 396
Extracting 397
Extracting 399
Extracting 400
Extracting 402
Extracting 403
Extracting 405
Extracting 406
Extracting 408
Extracting 409
Extracting 411
Extracting 412
Extracting 414
Extracting 415
Extracting 417
Extracting 418
Extracting 420
Extracting 421
Extracting 422
Extracting 424
Extracting 425
Extracting 426
Extracting 427
Extracting 429
Extracting 430
Extracting 431
Extracting 433
Extracting 435
Extracting 436
Extracting 438
Extracting 439
Extracting 440
Extracting 442
Extracting 443
Extracting 445
Extracting 446
Extracting 448
Extracting 449
Extracting 450
Extracting 452
Extracting 453
Extracting 455
Extracting 456
Extracting 457
Extracting 459
Extracting 461
Extracting 463
Extracting 464
Extracting 466
Extracting 468
Extracting 470
Extracting 471
Extracting 472
Extracting 473
Extracting 475
Extracting 476
Extracting 477
Extracting 478
Extracting 480
Extracting 481
Extracting 482
Extracting 483
Extracting 485
Extracting 486
Extracting 487
Extracting 488
Extracting 490
Extracting 491
Extracting 492
Extracting 493
Extracting 495
Extracting 496
Extracting 497
Extracting 498
Extracting 500
Extracting 501
Extracting 502
Extracting 503
Extracting 505
Extracting 506
Extracting 507
Extracting 508
Extracting 510
Extracting 511
Extracting 512
Extracting 513
Extracting 515
Extracting 516
Extracting 517
Extracting 518
Extracting 520
Extracting 521
Extracting 522
Extracting 523
Extracting 525
Extracting 526
Extracting 527
Extracting 528
Extracting 530
Extracting 531
Extracting 532
Extracting 533
Extracting 535
Extracting 536
Extracting 537
Extracting 538
Extracting 540
Extracting 541
Extracting 542
Extracting 544
Extracting 545
Extracting 546
Extracting 547
Extracting 549
Extracting 550
Extracting 551
Extracting 552
Extracting 554
Extracting 555
Extracting 556
Extracting 557
Extracting 559
Extracting 560
Extracting 561
Extracting 562
Extracting 564
Extracting 565
Extracting 566
Extracting 567
Extracting 569
Extracting 570
Extracting 571
Extracting 572
Extracting 574
Extracting 575
Extracting 576
Extracting 577
Extracting 579
Extracting 580
Extracting 581
Extracting 582
Extracting 584
Extracting 585
Extracting 586
Extracting 587
Extracting 589
Extracting 590
Extracting 591
Extracting 592
Extracting 594
Extracting 595
Extracting 596
Extracting 597
Extracting 599
Extracting 600
Extracting 601
Extracting 602
Extracting 604
Extracting 605
Extracting 606
Extracting 607
Extracting 609
Extracting 610
Extracting 611
Extracting 612
Extracting 614
Extracting 615
Extracting 616
Extracting 618
Extracting 619
Extracting 620
Extracting 622
Extracting 623
Extracting 624
Extracting 626
Extracting 627
Extracting 628
Extracting 630
Extracting 631
Extracting 632
Extracting 634
Extracting 635
Extracting 636
Extracting 638
Extracting 639
Extracting 640
Extracting 642
Extracting 643
Extracting 644
Extracting 646
Extracting 647
Extracting 648
Extracting 650
Extracting 651
Extracting 652
Extracting 654
Extracting 655
Extracting 656
Extracting 658
Extracting 659
Extracting 660
Extracting 662
Extracting 663
Extracting 664
Extracting 666
Extracting 667
Extracting 668
Extracting 670
Extracting 671
Extracting 672
Extracting 674
Extracting 675
Extracting 676
Extracting 678
Extracting 679
Extracting 680
Extracting 682
Extracting 683
Extracting 684
Extracting 686
Extracting 687
Extracting 688
Extracting 690
Extracting 691
Extracting 692
Extracting 694
Extracting 695
Extracting 696
Extracting 698
Extracting 699
Extracting 700
Extracting 701
Extracting 703
Extracting 704
Extracting 705
Extracting 706
Extracting 708
Extracting 709
Extracting 710
Extracting 711
Extracting 713
Extracting 714
Extracting 715
Extracting 717
Extracting 718
Extracting 719
Extracting 720
Extracting 722
Extracting 723
Extracting 724
Extracting 726
Extracting 727
Extracting 728
Extracting 729
Extracting 731
Extracting 732
Extracting 733
Extracting 734
Extracting 736
Extracting 737
Extracting 738
Extracting 740
Extracting 741
Extracting 742
Extracting 744
Extracting 745
Extracting 746
Extracting 747
Extracting 749
Extracting 750
Extracting 751
Extracting 752
Extracting 754
Extracting 755
Extracting 756
Extracting 757
Extracting 759
Extracting 760
Extracting 761
Extracting 762
Extracting 764
Extracting 765
Extracting 766
Extracting 767
Extracting 769
Extracting 770
Extracting 771
Extracting 773
Extracting 774
Extracting 775
Extracting 776
Extracting 778
Extracting 779
Extracting 780
Extracting 781
Extracting 783
Extracting 784
Extracting 785
Extracting 786
Extracting 788
Extracting 789
Extracting 790
Extracting 791
Extracting 793
Extracting 794
Extracting 795
Extracting 796
Extracting 798
Extracting 799
Extracting 800
Extracting 802
Extracting 803
Extracting 805
Extracting 806
Extracting 808
Extracting 810
Extracting 812
Extracting 814
Extracting 816
Extracting 817
Extracting 819
Extracting 820
Extracting 821
Extracting 823
Extracting 824
Extracting 825
Extracting 827
Extracting 828
Extracting 829
Extracting 830
Extracting 832
Extracting 833
Extracting 834
Extracting 835
Extracting 837
Extracting 838
Extracting 839
Extracting 840
Extracting 842
Extracting 843
Extracting 844
Extracting 845
Extracting 847
Extracting 848
Extracting 849
Extracting 851
Extracting 852
Extracting 853
Extracting 855
Extracting 856
Extracting 857
Extracting 858
Extracting 860
Extracting 861
Extracting 862
Extracting 863
Extracting 865
Extracting 866
Extracting 867
Extracting 868
Extracting 870
Extracting 871
Extracting 872
Extracting 873
Extracting 875
Extracting 876
Extracting 877
Extracting 878
Extracting 880
Extracting 881
Extracting 882
Extracting 883
Extracting 885
Extracting 886
Extracting 887
Extracting 888
Extracting 890
Extracting 891
Extracting 892
Extracting 893
Extracting 895
Extracting 896
Extracting 897
Extracting 898
Extracting 900
Extracting 901
Extracting 902
Extracting 903
Extracting 905
Extracting 906
Extracting 907
Extracting 908
Extracting 910
Extracting 911
Extracting 912
Extracting 914
Extracting 915
Extracting 916
Extracting 918
Extracting 919
Extracting 920
Extracting 921
Extracting 923
Extracting 924
Extracting 925
Extracting 926
Extracting 928
Extracting 929
Extracting 930
Extracting 931
Extracting 933
Extracting 934
Extracting 935
Extracting 936
Extracting 938
Extracting 939
Extracting 940
Extracting 942
Extracting 943
Extracting 944
Extracting 946
Extracting 947
Extracting 948
Extracting 949
Extracting 951
Extracting 952
Extracting 953
Extracting 954
Extracting 956
Extracting 957
Extracting 958
Extracting 959
Extracting 961
Extracting 962
Extracting 963
Extracting 964
Extracting 966
Extracting 967
Extracting 968
Extracting 969
Extracting 971
Extracting 972
Extracting 973
Extracting 974
Extracting 976
Extracting 977
Extracting 978
Extracting 979
Extracting 981
Extracting 982
Extracting 983
Extracting 984
Extracting 986
Extracting 987
Extracting 988
Extracting 989
Extracting 991
Extracting 992
Extracting 993
Extracting 994
Extracting 996
Extracting 997
Extracting 998
Extracting 1000
Extracting 1002
Extracting 1004
Extracting 1006
Extracting 1008
Extracting 1010
Extracting 1012
Extracting 1014
Extracting 1016
Extracting 1017
Extracting 1019
Extracting 1020
Extracting 1021
Extracting 1022
Extracting 1024
Extracting 1025
Extracting 1026
Extracting 1027
Extracting 1029
Extracting 1030
Extracting 1031
Extracting 1032
Extracting 1034
Extracting 1035
Extracting 1036
Extracting 1037
Extracting 1039
Extracting 1040
Extracting 1041
Extracting 1042
Extracting 1044
Extracting 1045
Extracting 1046
Extracting 1047
Extracting 1049
Extracting 1050
Extracting 1051
Extracting 1052
Extracting 1054
Extracting 1055
Extracting 1056
Extracting 1057
Extracting 1059
Extracting 1060
Extracting 1061
Extracting 1063
Extracting 1064
Extracting 1065
Extracting 1067
Extracting 1068
Extracting 1069
Extracting 1071
Extracting 1072
Extracting 1073
Extracting 1075
Extracting 1076
Extracting 1078
Extracting 1079
Extracting 1080
Extracting 1082
Extracting 1083
Extracting 1084
Extracting 1086
Extracting 1087
Extracting 1088
Extracting 1090
Extracting 1091
Extracting 1093
Extracting 1094
Extracting 1095
Extracting 1097
Extracting 1098
Extracting 1099
Extracting 1101
Extracting 1102
Extracting 1103
Extracting 1105
Extracting 1106
Extracting 1107
Extracting 1109
Extracting 1110
Extracting 1111
Extracting 1113
Extracting 1114
Extracting 1115
Extracting 1117
Extracting 1118
Extracting 1119
Extracting 1121
Extracting 1122
Extracting 1123
Extracting 1125
Extracting 1126
Extracting 1127
Extracting 1129
Extracting 1130
Extracting 1131
Extracting 1133
Extracting 1134
Extracting 1136
Extracting 1137
Extracting 1139
Extracting 1140
Extracting 1142
Extracting 1144
Extracting 1145
Extracting 1147
Extracting 1148
Extracting 1150
Extracting 1152
Extracting 1154
Extracting 1156
Extracting 1157
Extracting 1159
Extracting 1160
Extracting 1161
Extracting 1162
Extracting 1164
Extracting 1165
Extracting 1166
Extracting 1167
Extracting 1169
Extracting 1170
Extracting 1172
Extracting 1174
Extracting 1176
Extracting 1177
Extracting 1179
Extracting 1180
Extracting 1181
Extracting 1183
Extracting 1184
Extracting 1185
Extracting 1187
Extracting 1188
Extracting 1189
Extracting 1191
Extracting 1192
Extracting 1193
Extracting 1195
Extracting 1196
Extracting 1197
Extracting 1198
Extracting 1200
Extracting 1201
Extracting 1202
Extracting 1203
Extracting 1205
Extracting 1206
Extracting 1207
Extracting 1208
Extracting 1210
Extracting 1211
Extracting 1212
Extracting 1213
Extracting 1215
Extracting 1216
Extracting 1217
Extracting 1219
Extracting 1220
Extracting 1221
Extracting 1223
Extracting 1224
Extracting 1225
Extracting 1226
Extracting 1228
Extracting 1229
Extracting 1230
Extracting 1231
Extracting 1233
Extracting 1234
Extracting 1235
Extracting 1236
Extracting 1238
Extracting 1239
Extracting 1240
Extracting 1241
Extracting 1243
Extracting 1244
Extracting 1245
Extracting 1246
Extracting 1248
Extracting 1249
Extracting 1250
Extracting 1251
Extracting 1253
Extracting 1254
Extracting 1255
Extracting 1256
Extracting 1258
Extracting 1259
Extracting 1260
Extracting 1261
Extracting 1263
Extracting 1264
Extracting 1265
Extracting 1266
Extracting 1268
Extracting 1269
Extracting 1270
Extracting 1271
Extracting 1273
Extracting 1274
Extracting 1275
Extracting 1276
Extracting 1278
Extracting 1279
Extracting 1280
Extracting 1282
Extracting 1283
Extracting 1284
Extracting 1286
Extracting 1287
Extracting 1288
Extracting 1289
Extracting 1291
Extracting 1292
Extracting 1293
Extracting 1294
Extracting 1296
Extracting 1297
Extracting 1298
Extracting 1299
Extracting 1301
Extracting 1302
Extracting 1303
Extracting 1304
Extracting 1306
Extracting 1307
Extracting 1308
Extracting 1310
Extracting 1311
Extracting 1312
Extracting 1314
Extracting 1315
Extracting 1316
Extracting 1317
Extracting 1319
Extracting 1320
Extracting 1321
Extracting 1322
Extracting 1324
Extracting 1325
Extracting 1326
Extracting 1327
Extracting 1329
Extracting 1330
Extracting 1331
Extracting 1332
Extracting 1334
Extracting 1335
Extracting 1336
Extracting 1337
Extracting 1339
Extracting 1340
Extracting 1341
Extracting 1342
Extracting 1344
Extracting 1345
Extracting 1346
Extracting 1347
Extracting 1349
Extracting 1350
Extracting 1351
Extracting 1352
Extracting 1354
Extracting 1355
Extracting 1356
Extracting 1357
Extracting 1359
Extracting 1360
Extracting 1361
Extracting 1362
Extracting 1364
Extracting 1365
Extracting 1366
Extracting 1367
Extracting 1369
Extracting 1370
Extracting 1371
Extracting 1372
Extracting 1374
Extracting 1375
Extracting 1376
Extracting 1377
Extracting 1379
Extracting 1380
Extracting 1381
Extracting 1382
Extracting 1384
Extracting 1385
Extracting 1386
Extracting 1387
Extracting 1389
Extracting 1390
Extracting 1391
Extracting 1392
Extracting 1394
Extracting 1395
Extracting 1396
Extracting 1397
Extracting 1399
Extracting 1400
Extracting 1401
Extracting 1402
Extracting 1404
Extracting 1405
Extracting 1406
Extracting 1407
Extracting 1409
Extracting 1410
Extracting 1411
Extracting 1412
Extracting 1414
Extracting 1415
Extracting 1416
Extracting 1417
Extracting 1419
Extracting 1420
Extracting 1421
Extracting 1422
Extracting 1424
Extracting 1425
Extracting 1426
Extracting 1427
Extracting 1429
Extracting 1430
Extracting 1431
Extracting 1432
Extracting 1434
Extracting 1435
Extracting 1436
Extracting 1437
Extracting 1439
Extracting 1440
Extracting 1441
Extracting 1442
Extracting 1444
Extracting 1445
Extracting 1446
Extracting 1447
Extracting 1449
Extracting 1450
Extracting 1451
Extracting 1452
Extracting 1454
Extracting 1455
Extracting 1456
Extracting 1458
Extracting 1459
Extracting 1460
Extracting 1462
Extracting 1463
Extracting 1464
Extracting 1466
Extracting 1467
Extracting 1468
Extracting 1470
Extracting 1471
Extracting 1473
Extracting 1474
Extracting 1475
Extracting 1477
Extracting 1478
Extracting 1479
Extracting 1481
Extracting 1482
Extracting 1483
Extracting 1485
Extracting 1486
Extracting 1487
Extracting 1489
Extracting 1490
Extracting 1491
Extracting 1493
Extracting 1494
Extracting 1495
Extracting 1497
Extracting 1498
Extracting 1499
Extracting 1501
Extracting 1502
Extracting 1503
Extracting 1505
Extracting 1506
Extracting 1507
Extracting 1509
Extracting 1510
Extracting 1512
Extracting 1514
Extracting 1516
Extracting 1518
Extracting 1520
Extracting 1521
Extracting 1523
Extracting 1524
Extracting 1525
Extracting 1526
Extracting 1528
Extracting 1529
Extracting 1530
Extracting 1531
Extracting 1533
Extracting 1534
Extracting 1535
Extracting 1537
Extracting 1538
Extracting 1539
Extracting 1540
Extracting 1542
Extracting 1543
Extracting 1544
Extracting 1545
Extracting 1547
Extracting 1548
Extracting 1549
Extracting 1551
Extracting 1552
Extracting 1553
Extracting 1555
Extracting 1556
Extracting 1557
Extracting 1559
Extracting 1560
Extracting 1561
Extracting 1563
Extracting 1564
Extracting 1565
Extracting 1567
Extracting 1569
Extracting 1571
Extracting 1573
Extracting 1575
Extracting 1577
Extracting 1578
Extracting 1580
Extracting 1581
Extracting 1582
Extracting 1584
Extracting 1585
Extracting 1586
Extracting 1588
Extracting 1589
Extracting 1590
Extracting 1592
Extracting 1593
Extracting 1594
Extracting 1595
Extracting 1597
Extracting 1598
Extracting 1599
Extracting 1600
Extracting 1602
Extracting 1603
Extracting 1604
Extracting 1605
Extracting 1607
Extracting 1608
Extracting 1609
Extracting 1610
Extracting 1612
Extracting 1613
Extracting 1614
Extracting 1616
Extracting 1617
Extracting 1618
Extracting 1620
Extracting 1621
Extracting 1622
Extracting 1623
Extracting 1625
Extracting 1626
Extracting 1627
Extracting 1628
Extracting 1630
Extracting 1631
Extracting 1632
Extracting 1633
Extracting 1635
Extracting 1636
Extracting 1637
Extracting 1638
Extracting 1640
Extracting 1641
Extracting 1642
Extracting 1643
Extracting 1645
Extracting 1646
Extracting 1647
Extracting 1648
Extracting 1650
Extracting 1651
Extracting 1652
Extracting 1653
Extracting 1655
Extracting 1656
Extracting 1657
Extracting 1658
Extracting 1660
Extracting 1661
Extracting 1662
Extracting 1663
Extracting 1665
Extracting 1666
Extracting 1667
Extracting 1668
Extracting 1670
Extracting 1671
Extracting 1672
Extracting 1673
Extracting 1675
Extracting 1676
Extracting 1677
Extracting 1679
Extracting 1680
Extracting 1681
Extracting 1683
Extracting 1684
Extracting 1685
Extracting 1686
Extracting 1688
Extracting 1689
Extracting 1690
Extracting 1691
Extracting 1693
Extracting 1694
Extracting 1695
Extracting 1696
Extracting 1698
Extracting 1699
Extracting 1700
Extracting 1701
Extracting 1703
Extracting 1704
Extracting 1705
Extracting 1707
Extracting 1708
Extracting 1709
Extracting 1711
Extracting 1712
Extracting 1713
Extracting 1714
Extracting 1716
Extracting 1717
Extracting 1718
Extracting 1719
Extracting 1721
Extracting 1722
Extracting 1723
Extracting 1724
Extracting 1726
Extracting 1727
Extracting 1728
Extracting 1729
Extracting 1731
Extracting 1732
Extracting 1733
Extracting 1734
Extracting 1736
Extracting 1737
Extracting 1738
Extracting 1739
Extracting 1741
Extracting 1742
Extracting 1743
Extracting 1744
Extracting 1746
Extracting 1747
Extracting 1748
Extracting 1749
Extracting 1751
Extracting 1752
Extracting 1753
Extracting 1754
Extracting 1756
Extracting 1757
Extracting 1758
Extracting 1759
Extracting 1761
Extracting 1762
Extracting 1763
Extracting 1764
Extracting 1766
Extracting 1767
Extracting 1768
Extracting 1769
Extracting 1771
Extracting 1772
Extracting 1773
Extracting 1774
Extracting 1776
Extracting 1777
Extracting 1778
Extracting 1779
Extracting 1781
Extracting 1782
Extracting 1783
Extracting 1784
Extracting 1786
Extracting 1787
Extracting 1788
Extracting 1789
Extracting 1791
Extracting 1792
Extracting 1793
Extracting 1794
Extracting 1796
Extracting 1797
Extracting 1798
Extracting 1799
Extracting 1801
Extracting 1802
Extracting 1803
Extracting 1804
Extracting 1806
Extracting 1807
Extracting 1808
Extracting 1810
Extracting 1812
Extracting 1814
Extracting 1816
Extracting 1818
Extracting 1820
Extracting 1822
Extracting 1824
Extracting 1825
Extracting 1827
Extracting 1829
Extracting 1831
Extracting 1832
Extracting 1834
Extracting 1836
Extracting 1838
Extracting 1840
Extracting 1842
Extracting 1844
Extracting 1846
Extracting 1848
Extracting 1850
Extracting 1852
Extracting 1853
Extracting 1855
Extracting 1856
Extracting 1858
Extracting 1859
Extracting 1861
Extracting 1863
Extracting 1865
Extracting 1866
Extracting 1868
Extracting 1869
Extracting 1870
Extracting 1872
Extracting 1873
Extracting 1874
Extracting 1876
Extracting 1877
Extracting 1878
Extracting 1880
Extracting 1881
Extracting 1882
Extracting 1884
Extracting 1885
Extracting 1886
Extracting 1888
Extracting 1889
Extracting 1890
Extracting 1892
Extracting 1894
Extracting 1896
Extracting 1897
Extracting 1899
Extracting 1900
Extracting 1901
Extracting 1903
Extracting 1904
Extracting 1905
Extracting 1907
Extracting 1908
Extracting 1909
Extracting 1910
Extracting 1912
Extracting 1913
Extracting 1914
Extracting 1915
Extracting 1917
Extracting 1918
Extracting 1919
Extracting 1920
Extracting 1922
Extracting 1923
Extracting 1924
Extracting 1925
Extracting 1927
Extracting 1928
Extracting 1929
Extracting 1930
Extracting 1932
Extracting 1933
Extracting 1934
Extracting 1936
Extracting 1937
Extracting 1938
Extracting 1939
Extracting 1941
Extracting 1942
Extracting 1943
Extracting 1944
Extracting 1946
Extracting 1947
Extracting 1948
Extracting 1949
Extracting 1951
Extracting 1952
Extracting 1953
Extracting 1954
Extracting 1956
Extracting 1957
Extracting 1958
Extracting 1959
Extracting 1961
Extracting 1962
Extracting 1963
Extracting 1964
Extracting 1966
Extracting 1967
Extracting 1968
Extracting 1969
Extracting 1971
Extracting 1972
Extracting 1973
Extracting 1974
Extracting 1976
Extracting 1977
Extracting 1978
Extracting 1979
Extracting 1981
Extracting 1982
Extracting 1983
Extracting 1985
Extracting 1986
Extracting 1987
Extracting 1988
Extracting 1990
Extracting 1991
Extracting 1992
Extracting 1993
Extracting 1995
Extracting 1996
Extracting 1997
Extracting 1998
Extracting 2000
Extracting 2001
Extracting 2002
Extracting 2003
Extracting 2005
Extracting 2006
Extracting 2007
Extracting 2008
Extracting 2010
Extracting 2011
Extracting 2012
Extracting 2014
Extracting 2015
Extracting 2016
Extracting 2017
Extracting 2019
Extracting 2020
Extracting 2021
Extracting 2022
Extracting 2024
Extracting 2025
Extracting 2026
Extracting 2027
Extracting 2029
Extracting 2030
Extracting 2031
Extracting 2032
Extracting 2034
Extracting 2035
Extracting 2036
Extracting 2037
Extracting 2039
Extracting 2040
Extracting 2041
Extracting 2042
Extracting 2044
Extracting 2045
Extracting 2046
Extracting 2047
Extracting 2049
Extracting 2050
Extracting 2051
Extracting 2052
Extracting 2054
Extracting 2055
Extracting 2056
Extracting 2057
Extracting 2059
Extracting 2060
Extracting 2061
Extracting 2062
Extracting 2064
Extracting 2065
Extracting 2066
Extracting 2067
Extracting 2069
Extracting 2070
Extracting 2071
Extracting 2072
Extracting 2074
Extracting 2075
Extracting 2076
Extracting 2077
Extracting 2079
Extracting 2080
Extracting 2081
Extracting 2082
Extracting 2084
Extracting 2085
Extracting 2086
Extracting 2087
Extracting 2089
Extracting 2090
Extracting 2091
Extracting 2092
Extracting 2094
Extracting 2095
Extracting 2096
Extracting 2097
Extracting 2099
Extracting 2100
Extracting 2101
Extracting 2102
Extracting 2104
Extracting 2105
Extracting 2106
Extracting 2107
Extracting 2109
Extracting 2110
Extracting 2111
Extracting 2112
Extracting 2114
Extracting 2115
Extracting 2116
Extracting 2117
Extracting 2119
Extracting 2120
Extracting 2121
Extracting 2122
Extracting 2124
Extracting 2125
Extracting 2126
Extracting 2127
Extracting 2129
Extracting 2130
Extracting 2131
Extracting 2132
Extracting 2134
Extracting 2135
Extracting 2136
Extracting 2137
Extracting 2139
Extracting 2141
Extracting 2142
Extracting 2144
Extracting 2145
Extracting 2147
Extracting 2148
Extracting 2150
Extracting 2152
Extracting 2153
Extracting 2155
Extracting 2156
Extracting 2158
Extracting 2159
Extracting 2161
Extracting 2162
Extracting 2164
Extracting 2165
Extracting 2167
Extracting 2168
Extracting 2170
Extracting 2171
Extracting 2173
Extracting 2174
Extracting 2176
Extracting 2177
Extracting 2179
Extracting 2181
Extracting 2182
Extracting 2184
Extracting 2185
Extracting 2187
Extracting 2188
Extracting 2190
Extracting 2191
Extracting 2193
Extracting 2194
Extracting 2196
Extracting 2197
Extracting 2199
Extracting 2200
Extracting 2202
Extracting 2203
Extracting 2205
Extracting 2207
Extracting 2208
Extracting 2210
Extracting 2211
Extracting 2213
Extracting 2215
Extracting 2217
Extracting 2218
Extracting 2220
Extracting 2221
Extracting 2223
Extracting 2224
Extracting 2226
Extracting 2227
Extracting 2229
Extracting 2231
Extracting 2233
Extracting 2234
Extracting 2235
Extracting 2237
Extracting 2239
Extracting 2241
Extracting 2243
Extracting 2245
Extracting 2247
Extracting 2249
Extracting 2250
Extracting 2252
Extracting 2253
Extracting 2254
Extracting 2256
Extracting 2257
Extracting 2258
Extracting 2259
Extracting 2261
Extracting 2262
Extracting 2263
Extracting 2264
Extracting 2266
Extracting 2267
Extracting 2268
Extracting 2269
Extracting 2271
Extracting 2272
Extracting 2273
Extracting 2274
Extracting 2276
Extracting 2277
Extracting 2278
Extracting 2279
Extracting 2281
Extracting 2282
Extracting 2283
Extracting 2285
Extracting 2286
Extracting 2287
Extracting 2288
Extracting 2290
Extracting 2291
Extracting 2292
Extracting 2293
Extracting 2295
Extracting 2296
Extracting 2297
Extracting 2298
Extracting 2300
Extracting 2301
Extracting 2302
Extracting 2303
Extracting 2305
Extracting 2306
Extracting 2307
Extracting 2308
Extracting 2310
Extracting 2311
Extracting 2312
Extracting 2313
Extracting 2315
Extracting 2316
Extracting 2317
Extracting 2318
Extracting 2320
Extracting 2321
Extracting 2322
Extracting 2323
Extracting 2325
Extracting 2326
Extracting 2327
Extracting 2328
Extracting 2330
Extracting 2331
Extracting 2332
Extracting 2334
Extracting 2335
Extracting 2336
Extracting 2337
Extracting 2339
Extracting 2340
Extracting 2341
Extracting 2342
Extracting 2344
Extracting 2345
Extracting 2346
Extracting 2347
Extracting 2349
Extracting 2350
Extracting 2351
Extracting 2352
Extracting 2354
Extracting 2355
Extracting 2356
Extracting 2357
Extracting 2359
Extracting 2360
Extracting 2361
Extracting 2363
Extracting 2364
Extracting 2365
Extracting 2366
Extracting 2368
Extracting 2369
Extracting 2370
Extracting 2371
Extracting 2373
Extracting 2374
Extracting 2375
Extracting 2376
Extracting 2378
Extracting 2379
Extracting 2380
Extracting 2381
Extracting 2383
Extracting 2384
Extracting 2385
Extracting 2386
Extracting 2388
Extracting 2389
Extracting 2390
Extracting 2391
Extracting 2393
Extracting 2394
Extracting 2395
Extracting 2396
Extracting 2398
Extracting 2399
Extracting 2400
Extracting 2401
Extracting 2403
Extracting 2404
Extracting 2405
Extracting 2407
Extracting 2408
Extracting 2410
Extracting 2411
Extracting 2413
Extracting 2414
Extracting 2416
Extracting 2417
Extracting 2419
Extracting 2420
Extracting 2422
Extracting 2423
Extracting 2425
Extracting 2426
Extracting 2428
Extracting 2429
Extracting 2431
Extracting 2433
Extracting 2435
Extracting 2437
Extracting 2439
Extracting 2441
Extracting 2443
Extracting 2444
Extracting 2445
Extracting 2447
Extracting 2449
Extracting 2451
Extracting 2452
Extracting 2454
Extracting 2456
Extracting 2458
Extracting 2460
Extracting 2462
Extracting 2464
Extracting 2466
Extracting 2468
Extracting 2470
Extracting 2472
Extracting 2473
Extracting 2475
Extracting 2476
Extracting 2478
Extracting 2479
Extracting 2481
Extracting 2482
Extracting 2484
Extracting 2485
Extracting 2487
Extracting 2488
Extracting 2490
Extracting 2491
Extracting 2493
Extracting 2494
Extracting 2496
Extracting 2498
Extracting 2500
Extracting 2501
Extracting 2502
Extracting 2504
Extracting 2506
Extracting 2508
Extracting 2510
Extracting 2512
Extracting 2514
Extracting 2516
Extracting 2517
Extracting 2519
Extracting 2520
Extracting 2521
Extracting 2523
Extracting 2524
Extracting 2525
Extracting 2526
Extracting 2528
Extracting 2529
Extracting 2530
Extracting 2531
Extracting 2533
Extracting 2534
Extracting 2535
Extracting 2536
Extracting 2538
Extracting 2539
Extracting 2540
Extracting 2541
Extracting 2543
Extracting 2544
Extracting 2545
Extracting 2546
Extracting 2548
Extracting 2549
Extracting 2550
Extracting 2552
Extracting 2553
Extracting 2554
Extracting 2555
Extracting 2557
Extracting 2558
Extracting 2559
Extracting 2560
Extracting 2562
Extracting 2563
Extracting 2564
Extracting 2565
Extracting 2567
Extracting 2568
Extracting 2569
Extracting 2570
Extracting 2572
Extracting 2573
Extracting 2574
Extracting 2575
Extracting 2577
Extracting 2578
Extracting 2579
Extracting 2580
Extracting 2582
Extracting 2583
Extracting 2584
Extracting 2585
Extracting 2587
Extracting 2588
Extracting 2589
Extracting 2590
Extracting 2592
Extracting 2593
Extracting 2594
Extracting 2595
Extracting 2597
Extracting 2598
Extracting 2599
Extracting 2601
Extracting 2602
Extracting 2603
Extracting 2604
Extracting 2606
Extracting 2607
Extracting 2608
Extracting 2609
Extracting 2611
Extracting 2612
Extracting 2613
Extracting 2614
Extracting 2616
Extracting 2617
Extracting 2618
Extracting 2619
Extracting 2621
Extracting 2622
Extracting 2623
Extracting 2624
Extracting 2626
Extracting 2627
Extracting 2628
Extracting 2630
Extracting 2631
Extracting 2632
Extracting 2633
Extracting 2635
Extracting 2636
Extracting 2637
Extracting 2638
Extracting 2640
Extracting 2641
Extracting 2642
Extracting 2643
Extracting 2645
Extracting 2646
Extracting 2647
Extracting 2648
Extracting 2650
Extracting 2651
Extracting 2652
Extracting 2653
Extracting 2655
Extracting 2656
Extracting 2657
Extracting 2658
Extracting 2660
Extracting 2661
Extracting 2662
Extracting 2663
Extracting 2665
Extracting 2666
Extracting 2667
Extracting 2668
Extracting 2670
Extracting 2671
Extracting 2672
Extracting 2673
Extracting 2675
Extracting 2676
Extracting 2677
Extracting 2678
Extracting 2680
Extracting 2681
Extracting 2682
Extracting 2683
Extracting 2685
Extracting 2686
Extracting 2687
Extracting 2688
Extracting 2690
Extracting 2691
Extracting 2692
Extracting 2693
Extracting 2695
Extracting 2696
Extracting 2697
Extracting 2698
Extracting 2700
Extracting 2701
Extracting 2702
Extracting 2703
Extracting 2705
Extracting 2706
Extracting 2707
Extracting 2708
Extracting 2710
Extracting 2711
Extracting 2712
Extracting 2714
Extracting 2716
Extracting 2718
Extracting 2720
Extracting 2722
Extracting 2724
Extracting 2726
Extracting 2727
Extracting 2728
Extracting 2730
Extracting 2732
Extracting 2734
Extracting 2735
Extracting 2737
Extracting 2739
Extracting 2741
Extracting 2743
Extracting 2745
Extracting 2747
Extracting 2749
Extracting 2751
Extracting 2753
Extracting 2755
Extracting 2756
Extracting 2758
Extracting 2759
Extracting 2761
Extracting 2762
Extracting 2764
Extracting 2765
Extracting 2767
Extracting 2768
Extracting 2770
Extracting 2771
Extracting 2773
Extracting 2774
Extracting 2776
Extracting 2777
Extracting 2779
Extracting 2781
Extracting 2783
Extracting 2784
Extracting 2785
Extracting 2787
Extracting 2789
Extracting 2791
Extracting 2793
Extracting 2795
Extracting 2797
Extracting 2799
Extracting 2800
Extracting 2802
Extracting 2803
Extracting 2804
Extracting 2806
Extracting 2807
Extracting 2808
Extracting 2809
Extracting 2811
Extracting 2812
Extracting 2813
Extracting 2814
Extracting 2816
Extracting 2817
Extracting 2818
Extracting 2819
Extracting 2821
Extracting 2822
Extracting 2823
Extracting 2824
Extracting 2826
Extracting 2827
Extracting 2828
Extracting 2829
Extracting 2831
Extracting 2832
Extracting 2833
Extracting 2835
Extracting 2836
Extracting 2837
Extracting 2838
Extracting 2840
Extracting 2841
Extracting 2842
Extracting 2843
Extracting 2845
Extracting 2846
Extracting 2847
Extracting 2848
Extracting 2850
Extracting 2851
Extracting 2852
Extracting 2853
Extracting 2855
Extracting 2856
Extracting 2857
Extracting 2858
Extracting 2860
Extracting 2861
Extracting 2862
Extracting 2863
Extracting 2865
Extracting 2866
Extracting 2867
Extracting 2868
Extracting 2870
Extracting 2871
Extracting 2872
Extracting 2873
Extracting 2875
Extracting 2876
Extracting 2877
Extracting 2878
Extracting 2880
Extracting 2881
Extracting 2882
Extracting 2884
Extracting 2885
Extracting 2886
Extracting 2887
Extracting 2889
Extracting 2890
Extracting 2891
Extracting 2892
Extracting 2894
Extracting 2895
Extracting 2896
Extracting 2897
Extracting 2899
Extracting 2900
Extracting 2901
Extracting 2902
Extracting 2904
Extracting 2905
Extracting 2906
Extracting 2907
Extracting 2909
Extracting 2910
Extracting 2911
Extracting 2913
Extracting 2914
Extracting 2915
Extracting 2916
Extracting 2918
Extracting 2919
Extracting 2920
Extracting 2921
Extracting 2923
Extracting 2924
Extracting 2925
Extracting 2926
Extracting 2928
Extracting 2929
Extracting 2930
Extracting 2931
Extracting 2933
Extracting 2934
Extracting 2935
Extracting 2936
Extracting 2938
Extracting 2939
Extracting 2940
Extracting 2941
Extracting 2943
Extracting 2944
Extracting 2945
Extracting 2946
Extracting 2948
Extracting 2949
Extracting 2950
Extracting 2951
Extracting 2953
Extracting 2954
Extracting 2955
Extracting 2956
Extracting 2958
Extracting 2959
Extracting 2960
Extracting 2961
Extracting 2963
Extracting 2964
Extracting 2965
Extracting 2966
Extracting 2968
Extracting 2969
Extracting 2970
Extracting 2971
Extracting 2973
Extracting 2974
Extracting 2975
Extracting 2976
Extracting 2978
Extracting 2979
Extracting 2980
Extracting 2981
Extracting 2983
Extracting 2984
Extracting 2985
Extracting 2986
Extracting 2988
Extracting 2989
Extracting 2990
Extracting 2991
Extracting 2993
Extracting 2994
Extracting 2995
Extracting 2997
Extracting 2999
Extracting 3001
Extracting 3003
Extracting 3005
Extracting 3007
Extracting 3009
Extracting 3010
Extracting 3011
Extracting 3013
Extracting 3015
Extracting 3017
Extracting 3018
Extracting 3020
Extracting 3022
Extracting 3024
Extracting 3026
Extracting 3028
Extracting 3030
Extracting 3032
Extracting 3034
Extracting 3036
Extracting 3038
Extracting 3039
Extracting 3040
Extracting 3042
Extracting 3043
Extracting 3045
Extracting 3046
Extracting 3047
Extracting 3049
Extracting 3050
Extracting 3051
Extracting 3053
Extracting 3054
Extracting 3055
Extracting 3057
Extracting 3058
Extracting 3060
Extracting 3061
Extracting 3062
Extracting 3064
Extracting 3065
Extracting 3067
Extracting 3068
Extracting 3069
Extracting 3071
Extracting 3072
Extracting 3073
Extracting 3075
Extracting 3076
Extracting 3077
Extracting 3079
Extracting 3080
Extracting 3082
Extracting 3083
Extracting 3085
Extracting 3086
Extracting 3088
Extracting 3089
Extracting 3091
Extracting 3092
Extracting 3094
Extracting 3095
Extracting 3097
Extracting 3098
Extracting 3099
Extracting 3101
Extracting 3102
Extracting 3104
Extracting 3105
Extracting 3106
Extracting 3108
Extracting 3109
Extracting 3111
Extracting 3112
Extracting 3113
Extracting 3115
Extracting 3116
Extracting 3117
Extracting 3119
Extracting 3120
Extracting 3121
Extracting 3123
Extracting 3124
Extracting 3125
Extracting 3127
Extracting 3128
Extracting 3129
Extracting 3131
Extracting 3132
Extracting 3133
Extracting 3135
Extracting 3136
Extracting 3138
Extracting 3139
Extracting 3140
Extracting 3142
Extracting 3143
Extracting 3144
Extracting 3146
Extracting 3147
Extracting 3148
Extracting 3150
Extracting 3151
Extracting 3153
Extracting 3154
Extracting 3155
Extracting 3157
Extracting 3158
Extracting 3160
Extracting 3161
Extracting 3162
Extracting 3164
Extracting 3165
Extracting 3166
Extracting 3168
Extracting 3169
Extracting 3170
Extracting 3172
Extracting 3173
Extracting 3175
Extracting 3176
Extracting 3178
Extracting 3179
Extracting 3181
Extracting 3182
Extracting 3184
Extracting 3185
Extracting 3187
Extracting 3188
Extracting 3190
Extracting 3191
Extracting 3192
Extracting 3194
Extracting 3195
Extracting 3197
Extracting 3198
Extracting 3199
Extracting 3201
Extracting 3202
Extracting 3204
Extracting 3205
Extracting 3206
Extracting 3208
Extracting 3209
Extracting 3210
Extracting 3212
Extracting 3213
Extracting 3214
Extracting 3216
Extracting 3217
Extracting 3218
Extracting 3220
Extracting 3221
Extracting 3222
Extracting 3224
Extracting 3225
Extracting 3226
Extracting 3228
Extracting 3229
Extracting 3231
Extracting 3232
Extracting 3233
Extracting 3235
Extracting 3236
Extracting 3237
Extracting 3239
Extracting 3240
Extracting 3241
Extracting 3243
Extracting 3244
Extracting 3246
Extracting 3247
Extracting 3248
Extracting 3250
Extracting 3251
Extracting 3253
Extracting 3254
Extracting 3255
Extracting 3257
Extracting 3258
Extracting 3259
Extracting 3261
Extracting 3262
Extracting 3263
Extracting 3265
Extracting 3266
Extracting 3268
Extracting 3269
Extracting 3271
Extracting 3272
Extracting 3274
Extracting 3275
Extracting 3277
Extracting 3278
Extracting 3280
Extracting 3281
Extracting 3283
Extracting 3284
Extracting 3285
Extracting 3287
Extracting 3288
Extracting 3290
Extracting 3291
Extracting 3292
Extracting 3294
Extracting 3295
Extracting 3297
Extracting 3298
Extracting 3299
Extracting 3301
Extracting 3302
Extracting 3303
Extracting 3305
Extracting 3306
Extracting 3307
Extracting 3309
Extracting 3310
Extracting 3311
Extracting 3313
Extracting 3314
Extracting 3315
Extracting 3317
Extracting 3318
Extracting 3320
Extracting 3321
Extracting 3323
Extracting 3324
Extracting 3326
Extracting 3327
Extracting 3329
Extracting 3330
Extracting 3332
Extracting 3333
Extracting 3335
Extracting 3336
Extracting 3338
Extracting 3339
Extracting 3341
Extracting 3342
Extracting 3344
Extracting 3345
Extracting 3347
Extracting 3348
Extracting 3350
Extracting 3351
Extracting 3353
Extracting 3354
Extracting 3356
Extracting 3357
Extracting 3359
Extracting 3360
Extracting 3362
Extracting 3364
Extracting 3366
Extracting 3367
Extracting 3369
Extracting 3370
Extracting 3372
Extracting 3373
Extracting 3375
Extracting 3376
Extracting 3378
Extracting 3379
Extracting 3381
Extracting 3382
Extracting 3384
Extracting 3385
Extracting 3387
Extracting 3388
Extracting 3390
Extracting 3392
Extracting 3394
Extracting 3395
Extracting 3397
Extracting 3398
Extracting 3400
Extracting 3401
Extracting 3403
Extracting 3404
Extracting 3406
Extracting 3407
Extracting 3409
Extracting 3411
Extracting 3412
Extracting 3414
Extracting 3416
Extracting 3418
Extracting 3420
Extracting 3421
Extracting 3423
Extracting 3424
Extracting 3426
Extracting 3427
Extracting 3429
Extracting 3430
Extracting 3432
Extracting 3433
Extracting 3435
Extracting 3436
Extracting 3438
Extracting 3439
Extracting 3441
Extracting 3442
Extracting 3444
Extracting 3446
Extracting 3448
Extracting 3449
Extracting 3451
Extracting 3452
Extracting 3454
Extracting 3455
Extracting 3457
Extracting 3458
Extracting 3460
Extracting 3461
Extracting 3463
Extracting 3464
Extracting 3466
Extracting 3467
Extracting 3469
Extracting 3470
Extracting 3472
Extracting 3473
Extracting 3475
Extracting 3476
Extracting 3478
Extracting 3479
Extracting 3481
Extracting 3482
Extracting 3484
Extracting 3485
Extracting 3487
Extracting 3488
Extracting 3490
Extracting 3491
Extracting 3493
Extracting 3494
Extracting 3495
Extracting 3497
Extracting 3498
Extracting 3499
Extracting 3500
Extracting 3502
Extracting 3503
Extracting 3504
Extracting 3505
Extracting 3507
Extracting 3508
Extracting 3509
Extracting 3510
Extracting 3512
Extracting 3513
Extracting 3514
Extracting 3515
Extracting 3517
Extracting 3518
Extracting 3519
Extracting 3520
Extracting 3522
Extracting 3523
Extracting 3524
Extracting 3525
Extracting 3527
Extracting 3528
Extracting 3529
Extracting 3530
Extracting 3532
Extracting 3533
Extracting 3534
Extracting 3535
Extracting 3537
Extracting 3538
Extracting 3539
Extracting 3540
Extracting 3542
Extracting 3543
Extracting 3544
Extracting 3545
Extracting 3547
Extracting 3548
Extracting 3549
Extracting 3550
Extracting 3552
Extracting 3553
Extracting 3554
Extracting 3555
Extracting 3557
Extracting 3558
Extracting 3559
Extracting 3560
Extracting 3562
Extracting 3563
Extracting 3564
Extracting 3565
Extracting 3567
Extracting 3568
Extracting 3569
Extracting 3570
Extracting 3572
Extracting 3573
Extracting 3574
Extracting 3575
Extracting 3577
Extracting 3578
Extracting 3579
Extracting 3580
Extracting 3582
Extracting 3583
Extracting 3584
Extracting 3585
Extracting 3587
Extracting 3588
Extracting 3589
Extracting 3590
Extracting 3592
Extracting 3593
Extracting 3594
Extracting 3595
Extracting 3597
Extracting 3598
Extracting 3599
Extracting 3600
Extracting 3602
Extracting 3603
Extracting 3604
Extracting 3605
Extracting 3607
Extracting 3608
Extracting 3609
Extracting 3610
Extracting 3612
Extracting 3613
Extracting 3614
Extracting 3615
Extracting 3617
Extracting 3618
Extracting 3619
Extracting 3620
Extracting 3622
Extracting 3623
Extracting 3624
Extracting 3625
Extracting 3627
Extracting 3628
Extracting 3629
Extracting 3630
Extracting 3632
Extracting 3633
Extracting 3634
Extracting 3635
Extracting 3637
Extracting 3638
Extracting 3639
Extracting 3640
Extracting 3642
Extracting 3643
Extracting 3644
Extracting 3645
Extracting 3647
Extracting 3648
Extracting 3649
Extracting 3650
Extracting 3652
Extracting 3653
Extracting 3654
Extracting 3655
Extracting 3657
Extracting 3658
Extracting 3659
Extracting 3660
Extracting 3662
Extracting 3663
Extracting 3664
Extracting 3665
Extracting 3667
Extracting 3668
Extracting 3669
Extracting 3670
Extracting 3672
Extracting 3673
Extracting 3674
Extracting 3675
Extracting 3677
Extracting 3678
Extracting 3679
Extracting 3680
Extracting 3682
Extracting 3683
Extracting 3684
Extracting 3685
Extracting 3687
Extracting 3688
Extracting 3689
Extracting 3690
Extracting 3692
Extracting 3693
Extracting 3694
Extracting 3695
Extracting 3697
Extracting 3698
Extracting 3699
Extracting 3700
Extracting 3702
Extracting 3703
Extracting 3704
Extracting 3705
Extracting 3707
Extracting 3708
Extracting 3709
Extracting 3710
Extracting 3712
Extracting 3713
Extracting 3714
Extracting 3715
Extracting 3717
Extracting 3718
Extracting 3719
Extracting 3720
Extracting 3722
Extracting 3723
Extracting 3724
Extracting 3725
Extracting 3727
Extracting 3728
Extracting 3729
Extracting 3730
Extracting 3732
Extracting 3733
Extracting 3734
Extracting 3735
Extracting 3737
Extracting 3738
Extracting 3739
Extracting 3740
Extracting 3742
Extracting 3743
Extracting 3744
Extracting 3745
Extracting 3747
Extracting 3748
Extracting 3749
Extracting 3750
Extracting 3752
Extracting 3753
Extracting 3754
Extracting 3755
Extracting 3757
Extracting 3758
Extracting 3759
Extracting 3760
Extracting 3762
Extracting 3763
Extracting 3764
Extracting 3765
Extracting 3767
Extracting 3768
Extracting 3769
Extracting 3770
Extracting 3772
Extracting 3773
Extracting 3774
Extracting 3775
Extracting 3777
Extracting 3778
Extracting 3779
Extracting 3780
Extracting 3782
Extracting 3783
Extracting 3784
Extracting 3785
Extracting 3787
Extracting 3788
Extracting 3789
Extracting 3790
Extracting 3792
Extracting 3793
Extracting 3794
Extracting 3795
Extracting 3797
Extracting 3798
Extracting 3799
Extracting 3800
Extracting 3802
Extracting 3803
Extracting 3804
Extracting 3805
Extracting 3807
Extracting 3808
Extracting 3809
Extracting 3810
Extracting 3812
Extracting 3813
Extracting 3814
Extracting 3815
Extracting 3817
Extracting 3818
Extracting 3819
Extracting 3820
Extracting 3822
Extracting 3823
Extracting 3824
Extracting 3825
Extracting 3827
Extracting 3828
Extracting 3829
Extracting 3830
Extracting 3832
Extracting 3833
Extracting 3834
Extracting 3835
Extracting 3837
Extracting 3838
Extracting 3839
Extracting 3841
Extracting 3842
Extracting 3843
Extracting 3845
Extracting 3846
Extracting 3847
Extracting 3849
Extracting 3850
Extracting 3851
Extracting 3853
Extracting 3854
Extracting 3855
Extracting 3857
Extracting 3859
Extracting 3861
Extracting 3863
Extracting 3865
Extracting 3867
Extracting 3869
Extracting 3871
Extracting 3873
Extracting 3874
Extracting 3876
Extracting 3878
Extracting 3880
Extracting 3882
Extracting 3884
Extracting 3885
Extracting 3887
Extracting 3888
Extracting 3889
Extracting 3890
Extracting 3892
Extracting 3893
Extracting 3894
Extracting 3896
Extracting 3898
Extracting 3899
Extracting 3901
Extracting 3902
Extracting 3903
Extracting 3904
Extracting 3906
Extracting 3907
Extracting 3908
Extracting 3909
Extracting 3911
Extracting 3912
Extracting 3913
Extracting 3914
Extracting 3916
Extracting 3917
Extracting 3918
Extracting 3920
Extracting 3922
Extracting 3924
Extracting 3926
Extracting 3927
Extracting 3929
Extracting 3930
Extracting 3931
Extracting 3933
Extracting 3934
Extracting 3935
Extracting 3937
Extracting 3938
Extracting 3939
Extracting 3941
Extracting 3942
Extracting 3944
Extracting 3946
Extracting 3948
Extracting 3950
Extracting 3952
Extracting 3954
Extracting 3955
Extracting 3956
Extracting 3958
Extracting 3959
Extracting 3960
Extracting 3962
Extracting 3963
Extracting 3964
Extracting 3965
Extracting 3967
Extracting 3968
Extracting 3969
Extracting 3971
Extracting 3972
Extracting 3973
Extracting 3975
Extracting 3976
Extracting 3977
Extracting 3979
Extracting 3980
Extracting 3982
Extracting 3984
Extracting 3986
Extracting 3988
Extracting 3990
Extracting 3992
Extracting 3994
Extracting 3996
Extracting 3998
Extracting 4000
Extracting 4002
Extracting 4004
Extracting 4006
Extracting 4008
Extracting 4009
Extracting 4011
Extracting 4012
Extracting 4014
Extracting 4016
Extracting 4018
Extracting 4020
Extracting 4021
Extracting 4023
Extracting 4024
Extracting 4026
Extracting 4028
Extracting 4030
Extracting 4031
Extracting 4032
Extracting 4034
Extracting 4035
Extracting 4036
Extracting 4038
Extracting 4039
Extracting 4040
Extracting 4042
Extracting 4043
Extracting 4044
Extracting 4046
Extracting 4047
Extracting 4048
Extracting 4050
Extracting 4051
Extracting 4052
Extracting 4054
Extracting 4055
Extracting 4056
Extracting 4058
Extracting 4059
Extracting 4060
Extracting 4061
Extracting 4063
Extracting 4064
Extracting 4065
Extracting 4066
Extracting 4068
Extracting 4069
Extracting 4070
Extracting 4071
Extracting 4073
Extracting 4074
Extracting 4075
Extracting 4076
Extracting 4078
Extracting 4079
Extracting 4080
Extracting 4081
Extracting 4083
Extracting 4084
Extracting 4085
Extracting 4086
Extracting 4088
Extracting 4089
Extracting 4090
Extracting 4091
Extracting 4093
Extracting 4094
Extracting 4095
Extracting 4096
Extracting 4098
Extracting 4099
Extracting 4100
Extracting 4102
Extracting 4103
Extracting 4105
Extracting 4106
Extracting 4108
Extracting 4109
Extracting 4111
Extracting 4112
Extracting 4114
Extracting 4115
Extracting 4117
Extracting 4118
Extracting 4120
Extracting 4121
Extracting 4123
Extracting 4124
Extracting 4126
Extracting 4128
Extracting 4130
Extracting 4132
Extracting 4134
Extracting 4136
Extracting 4137
Extracting 4138
Extracting 4139
Extracting 4141
Extracting 4142
Extracting 4143
Extracting 4144
Extracting 4146
Extracting 4147
Extracting 4148
Extracting 4149
Extracting 4151
Extracting 4152
Extracting 4153
Extracting 4154
Extracting 4156
Extracting 4157
Extracting 4158
Extracting 4159
Extracting 4161
Extracting 4162
Extracting 4163
Extracting 4164
Extracting 4166
Extracting 4167
Extracting 4168
Extracting 4169
Extracting 4171
Extracting 4172
Extracting 4173
Extracting 4174
Extracting 4176
Extracting 4177
Extracting 4178
Extracting 4179
Extracting 4181
Extracting 4182
Extracting 4183
Extracting 4184
Extracting 4186
Extracting 4187
Extracting 4188
Extracting 4189
Extracting 4191
Extracting 4192
Extracting 4193
Extracting 4194
Extracting 4196
Extracting 4198
Extracting 4200
Extracting 4202
Extracting 4204
Extracting 4206
Extracting 4207
Extracting 4209
Extracting 4210
Extracting 4211
Extracting 4212
Extracting 4214
Extracting 4215
Extracting 4216
Extracting 4217
Extracting 4219
Extracting 4220
Extracting 4221
Extracting 4222
Extracting 4224
Extracting 4225
Extracting 4226
Extracting 4227
Extracting 4229
Extracting 4230
Extracting 4231
Extracting 4232
Extracting 4234
Extracting 4235
Extracting 4236
Extracting 4237
Extracting 4239
Extracting 4240
Extracting 4241
Extracting 4243
Extracting 4245
Extracting 4247
Extracting 4249
Extracting 4251
Extracting 4253
Extracting 4254
Extracting 4256
Extracting 4258
Extracting 4260
Extracting 4262

In [46]:
agg.loc[f_nr] = output.groupby('r')[cols].mean().loc[1.0]

In [62]:
agg_1.head()


Out[62]:
next_10_num_nan next_10_num_sum previous_10_num_nan previous_10_num_sum next_20_num_nan next_20_num_sum previous_20_num_nan previous_20_num_sum next_50_num_nan next_50_num_sum previous_50_num_nan previous_50_num_sum next_100_num_nan next_100_num_sum previous_100_num_nan previous_100_num_sum
0 5.0336 0.156901 4.97031 0.15677 10.0685 0.258038 9.93376 0.258038 25.0003 0.525499 24.9839 0.525499 50.0119 0.852273 50.0502 0.852273
2 5.02746 0.150347 4.97338 0.150305 10.059 0.251386 10.0083 0.251386 24.9884 0.527439 24.9637 0.527439 49.9728 0.851441 50.0646 0.851441
4 4.93014 0.150263 5.03271 0.150263 9.95011 0.24612 10.0255 0.24612 25.0097 0.52439 25.0416 0.52439 49.9584 0.851718 50.0928 0.851718
6 5.00915 0.146105 4.98002 0.146267 9.9842 0.255266 10.0158 0.255266 25.0086 0.531596 25.0635 0.531596 49.9271 0.861419 50.0807 0.861419
8 5.0186 0.151582 4.97335 0.151582 10.0396 0.255266 9.99335 0.255266 24.9706 0.539911 25.0449 0.539911 49.8927 0.860865 50.0676 0.860865

In [63]:
agg_0.head()


Out[63]:
next_10_num_nan next_10_num_sum previous_10_num_nan previous_10_num_sum next_20_num_nan next_20_num_sum previous_20_num_nan previous_20_num_sum next_50_num_nan next_50_num_sum previous_50_num_nan previous_50_num_sum next_100_num_nan next_100_num_sum previous_100_num_nan previous_100_num_sum
0 4.99807 0.0262271 4.99845 0.0258643 10.0058 0.052798 10.0066 0.0520729 25.0174 0.131834 25.0176 0.131746 50.0244 0.264294 50.0242 0.2645
2 4.99952 0.0262613 4.99984 0.0259626 10.0057 0.0524324 10.006 0.0521594 25.0155 0.131932 25.0157 0.131799 50.0249 0.264221 50.0245 0.264715
4 4.99865 0.0259489 4.99813 0.0265015 10.008 0.0523683 10.0077 0.0527741 25.0144 0.131529 25.0144 0.131701 50.0247 0.264067 50.0242 0.264791
6 4.99597 0.0262269 4.99614 0.0260998 10.0073 0.0523713 10.0071 0.0525414 25.0154 0.131373 25.0152 0.131668 50.0259 0.264081 50.0251 0.264907
8 4.99688 0.0262244 4.99714 0.0259809 10.0054 0.0524922 10.0057 0.052243 25.0167 0.131428 25.0163 0.131828 50.0233 0.264154 50.0223 0.265095

In [71]:
(agg_1 / agg_0).filter(like='_sum').mean()


Out[71]:
next_10_num_sum         7.607240
previous_10_num_sum     8.261421
next_20_num_sum         5.647394
previous_20_num_sum     5.712329
next_50_num_sum         3.965427
previous_50_num_sum     3.987973
next_100_num_sum        2.954539
previous_100_num_sum    2.960948
dtype: float64

In [72]:
(agg_1 / agg_0)['previous_10_num_sum'].plot(linestyle='None', marker='.')


Out[72]:
<matplotlib.axes._subplots.AxesSubplot at 0x12bab48d0>

In [67]:
(agg_1 / agg_0)['previous_20_num_sum'].plot(linestyle='None', marker='.')


Out[67]:
<matplotlib.axes._subplots.AxesSubplot at 0x12b4f8a50>

In [70]:
(agg_1 / agg_0).sort_values('previous_20_num_sum', ascending=False)


Out[70]:
next_10_num_nan next_10_num_sum previous_10_num_nan previous_10_num_sum next_20_num_nan next_20_num_sum previous_20_num_nan previous_20_num_sum next_50_num_nan next_50_num_sum previous_50_num_nan previous_50_num_sum next_100_num_nan next_100_num_sum previous_100_num_nan previous_100_num_sum
2720 1.60182 75.5 0.501185 352.333 1.25172 50.3333 0.90402 75.5 1.12453 22.9783 1.06839 24.5814 1.08497 10.1635 0.962558 11.4891
2718 1.40345 81.3077 0.602222 211.4 1.25018 50.3333 0.902476 75.5 1.18755 21.14 0.989795 26.425 1.12424 9.69725 0.911155 12.0114
2712 1.30568 176.083 1.30568 176.083 1.20605 66.0625 1.15734 70.4667 1.04649 22.0208 1.02835 22.4894 1.11541 10.0667 0.951887 11.8764
2714 1.49674 88.0417 0.698941 264.125 1.19836 55.6316 1.00142 70.4667 1.08544 21.14 0.988397 23.4889 1.12508 10.3627 0.982653 12.0114
2726 1.09969 105.55 0.900427 131.938 1.20319 52.85 0.95379 70.4667 1.16756 20.3269 0.948235 25.7805 1.12527 9.9717 0.941562 12.0114
2722 1.29497 117.389 0.996886 176.083 1.05149 50.3333 0.903033 58.7222 1.12522 20.7255 0.967524 24.5814 1.07419 10.1635 0.962152 11.3656
2724 1.10313 132.062 1.10366 132.062 1.04841 66.0625 1.15136 58.7222 1.08625 22.9783 1.06996 23.4889 1.09543 10.3627 0.982954 11.6154
2716 0.806642 132.062 1.11051 96.0455 0.80407 75.5 1.25995 45.9565 0.884956 25.1667 1.15036 19.2182 1.0234 10.7857 1.02288 10.7857
902 0.934759 22.54 0.948526 22.2189 0.958585 19.1384 0.969639 18.9194 1.03525 10.3134 0.967014 11.0922 0.997996 5.44253 0.978364 5.55458
839 0.962693 23.806 0.957651 23.9464 0.969265 19.018 0.976333 18.8805 1.04694 10.4105 0.976518 11.2314 1.00379 5.42066 0.974184 5.58833
867 0.924853 25.9809 0.973447 24.6409 0.967403 17.9892 0.989233 17.5769 1.02357 10.6523 0.997724 10.9565 0.999835 5.41214 0.988966 5.47462
844 0.919892 25.1389 0.944364 24.4944 0.998043 16.3714 0.984964 16.6118 1.02055 10.7901 0.992708 11.1227 0.997447 5.42793 0.975715 5.55153
816 0.966224 25.1691 0.99112 24.5314 1.01077 16.1643 0.988254 16.5726 0.983229 11.619 0.990329 11.5407 0.987579 6.71336 0.988216 6.71336
2506 1.35609 12.747 0.945351 20.5913 1.18667 13.0701 0.968318 16.4885 1.10378 5.15264 0.970335 5.88874 1.05685 5.19007 0.978844 5.59661
1041 0.903515 24.6595 0.938595 23.7901 0.968297 16.0572 0.962558 16.1627 1.00066 7.21916 0.969002 7.45762 0.988185 3.82925 0.984723 3.84487
892 0.96307 21.0988 0.998303 20.3038 0.966498 17.0371 1.02191 16.0463 1.01494 10.8267 0.996214 11.052 0.998952 5.52103 0.984377 5.60601
829 0.938525 22.2765 0.979093 21.3245 1.03252 15.3294 0.999231 15.9092 1.01506 10.6461 0.988682 10.9536 0.995843 5.49575 0.988004 5.54241
1026 0.886556 19.5247 0.953083 18.2631 0.949191 16.46 0.985923 15.8504 1.00575 7.49433 0.97281 7.75342 0.977003 3.79487 0.9755 3.80311
834 0.916427 25.004 0.978421 23.3794 0.997342 16.3173 1.02452 15.8402 1.0096 10.6042 0.993532 10.793 0.990065 5.44508 0.986848 5.46583
857 0.948111 20.1918 0.956484 20.0187 0.967047 16.5972 1.01165 15.8203 1.02683 10.5566 0.988785 11.0014 1.00106 5.42502 0.974924 5.57294
877 0.924388 25.0019 0.978339 23.5773 0.993358 16.1953 1.01785 15.771 1.01178 10.5923 0.99248 10.8178 0.99224 5.4377 0.98553 5.47776
897 0.972822 18.1498 0.981172 17.992 1.00563 15.5844 0.997918 15.7221 1.016 10.6342 0.996582 10.8615 0.992152 5.48375 0.993729 5.47776
862 0.92134 21.0158 0.969903 19.9621 0.968259 16.3845 1.01421 15.5943 1.01574 10.6282 0.986831 10.9663 0.995969 5.43814 0.977374 5.54393
2514 1.25773 0 0.820508 0 1.12421 11.9056 0.868211 15.3071 1.08128 5.33085 1.00518 5.76075 1.05645 3.93934 0.994653 4.18555
882 0.937693 21.0581 1.01848 19.2785 1.00403 14.8932 1.01778 14.6754 1.01325 10.7183 0.995015 10.9346 0.993934 5.49873 0.988551 5.5318
1046 0.866192 21.402 0.917042 20.358 0.913042 15.6556 0.978102 14.662 1.01127 7.53232 0.978346 7.7941 0.980547 3.78433 0.972738 3.81614
872 0.92682 20.1291 0.978077 19.0637 1.00106 14.3811 0.986654 14.6114 1.01273 10.7002 0.993915 10.922 1.00121 5.43918 0.985899 5.52753
1036 0.900728 20.8222 0.934723 20.0934 0.930371 15.1418 0.987607 14.2861 1.00273 7.45204 0.967805 7.7232 0.982283 3.81139 0.980035 3.82209
1021 0.924835 23.8803 0.934682 23.6461 0.970355 13.8552 0.945608 14.2163 1.01477 7.49905 0.974273 7.81974 0.975484 3.82209 0.982825 3.79722
810 0.95218 17.9259 0.938176 18.1878 0.97084 13.4676 0.945239 13.8272 0.981334 9.85193 0.96494 10.0194 0.998705 6.04498 0.965413 6.16571
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
4182 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
4184 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
4186 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
4187 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
4189 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
4191 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
4192 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
4194 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
4207 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
4209 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
4210 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
4212 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
4214 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
4215 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
4217 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
4219 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
4220 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
4222 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
4224 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
4225 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
4227 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
4229 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
4230 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
4232 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
4234 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
4235 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
4237 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
4239 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
4240 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
4254 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

3139 rows × 16 columns

Create features based on lag/lead features


In [25]:
# Create shortlist
cols = ['next_10_num_sum', 'next_20_num_sum', 'next_50_num_sum', 'next_100_num_sum']
shortlist = ["L3_S33_F3857",   "L3_S32_F3850", 
"L3_S33_F3859",  "L3_S29_F3354","L3_S30_F3754", "L3_S30_F3759",  "L3_S30_F3749", "L3_S33_F3865", "L3_S30_F3804", 
"L3_S29_F3348", 
"L3_S29_F3351", "L3_S29_F3345",   "L3_S30_F3554",  
"L3_S29_F3321", "L3_S30_F3494", "L3_S29_F3342",   "L3_S29_F3373", 
"L3_S29_F3324",   "L3_S30_F3769", "L3_S29_F3427",  "L3_S30_F3829"]

output10 = pd.DataFrame(index=ids_all.Id, columns = shortlist)
output20 = pd.DataFrame(index=ids_all.Id, columns = shortlist)
output50 = pd.DataFrame(index=ids_all.Id, columns = shortlist)
output100 = pd.DataFrame(index=ids_all.Id, columns = shortlist)

for f_name in shortlist:
    # Read local error rate file for specific features
    # Extract single column
    # Add column to dataframe
    f_nr = lut[lut['name_num']==f_name].feature_nr.values[0]
    print('Extracting {} (nr {})'.format(f_name, f_nr))
    #print f_nr
    f_file = os.path.join(const.BASE_PATH, 
                          'feat_set_next_previous_num_feat_nr_{}.csv'.format(f_nr))
    
    output_f = pd.read_csv(f_file, index_col='ID')
    
    output10[f_name] = output_f['next_10_num_sum']
    output20[f_name] = output_f['next_20_num_sum']
    output50[f_name] = output_f['next_50_num_sum']
    output100[f_name] = output_f['next_100_num_sum']


Extracting L3_S33_F3857 (nr 3857)
Extracting L3_S32_F3850 (nr 3850)
Extracting L3_S33_F3859 (nr 3859)
Extracting L3_S29_F3354 (nr 3354)
Extracting L3_S30_F3754 (nr 3754)
Extracting L3_S30_F3759 (nr 3759)
Extracting L3_S30_F3749 (nr 3749)
Extracting L3_S33_F3865 (nr 3865)
Extracting L3_S30_F3804 (nr 3804)
Extracting L3_S29_F3348 (nr 3348)
Extracting L3_S29_F3351 (nr 3351)
Extracting L3_S29_F3345 (nr 3345)
Extracting L3_S30_F3554 (nr 3554)
Extracting L3_S29_F3321 (nr 3321)
Extracting L3_S30_F3494 (nr 3494)
Extracting L3_S29_F3342 (nr 3342)
Extracting L3_S29_F3373 (nr 3373)
Extracting L3_S29_F3324 (nr 3324)
Extracting L3_S30_F3769 (nr 3769)
Extracting L3_S29_F3427 (nr 3427)
Extracting L3_S30_F3829 (nr 3829)

In [26]:
output10.to_csv(os.path.join(const.DATA_PATH,'feat_set_next_previous_num_all_10.csv'), index_label='ID')
output20.to_csv(os.path.join(const.DATA_PATH,'feat_set_next_previous_num_all_20.csv'), index_label='ID')
output50.to_csv(os.path.join(const.DATA_PATH,'feat_set_next_previous_num_all_50.csv'), index_label='ID')
output100.to_csv(os.path.join(const.DATA_PATH,'feat_set_next_previous_num_all_100.csv'), index_label='ID')

In [92]:
output100.head()


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-92-ddc44ba68bf6> in <module>()
----> 1 output100.head()

NameError: name 'output100' is not defined

In [91]:
output10.shape


Out[91]:
(2367495, 21)

In [88]:
output10['L3_S33_F3859'].value_counts()


Out[88]:
0.0    2176266
1.0      46596
2.0       3510
3.0        425
4.0        108
5.0         15
6.0          8
7.0          2
Name: L3_S33_F3859, dtype: int64

In [ ]: