By: 顾 瞻 GU Zhan (Sam)

July 2017

[1] Import useful reference packages


In [1]:
# from __future__ import print_function, division
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()
import pandas as pd
import operator
from scipy import interp
from itertools import cycle
from sklearn import svm
from sklearn.utils.validation import check_random_state
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.preprocessing import StandardScaler

from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.ensemble import BaggingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier

from sklearn.metrics import roc_curve, auc
from statsmodels.graphics.mosaicplot import mosaic
print(__doc__)


Automatically created module for IPython interactive environment

Read raw data


In [2]:
df_history_ts_process = pd.read_csv('data/history_ts.csv') 
df_history_ts_process.tail()


Out[2]:
ccyy-mm time bid-price
1886 2017-07 11:29:56 92100
1887 2017-07 11:29:57 92100
1888 2017-07 11:29:58 92100
1889 2017-07 11:29:59 92200
1890 2017-07 11:30:00 92200

In [3]:
df_history_table_process = pd.read_csv('data/history_table.csv') 
df_history_table_process.tail()


Out[3]:
ccyy-mm volume-plate deal-price-low deal-price-avg deal-early-second volume-bidder
26 2017-03 10356 87800 87916 55 262010
27 2017-04 12196 89800 89850 59 252273
28 2017-05 10316 90100 90209 55 270197
29 2017-06 10312 89400 89532 45 244349
30 2017-07 10325 92200 92250 57 269189

[2] Data pre-porcessing

Explore and visualize data

Parameters


In [4]:
parm_calculate_base_price_second = 15 # Use the current month's bid-price as base-price at this seconds. Later to derive increment-price
parm_calculate_target_second = 7 # How many seconds in future to predict: target variable
parm_calculate_prev_bp = 15 # Number of previous price/increment to include, i.e. previous 2sec, 3sec, 4sec, 5sec ... 15sec
parm_calculate_mv = 15 # Number of  previous price/increment Moving Average to calculate, i.e. previous 2sec, 3sec, 4sec, 5sec ... 15sec
parm_calculate_prev_month = 3 # Number of previous month to include (need to remove earliest x month from training data)
print('parm_calculate_base_price_second : %3d seconds' % parm_calculate_base_price_second)
print('parm_calculate_target_second     : %3d seconds' % parm_calculate_target_second)
print('parm_calculate_prev_bp           : %3d seconds' % parm_calculate_prev_bp)
print('parm_calculate_mv                : %3d seconds' % parm_calculate_mv)
print('parm_calculate_prev_month        : %3d months' % parm_calculate_prev_month)

print('' )
parm_ts_cycle = 61 # seconds/records per month
print('parm_ts_cycle                    : %3d seconds' % parm_ts_cycle)
parm_ts_month = int(len(df_history_ts_process) / parm_ts_cycle)
print('parm_ts_month                    : %3d months' %  parm_ts_month)

parm_record_cut_row_head = max(parm_calculate_base_price_second, parm_calculate_prev_bp, parm_calculate_mv)
parm_record_cut_row_tail = parm_calculate_target_second
parm_record_cut_month_head = parm_calculate_prev_month + 1

parm_ts_valid_cycle = parm_ts_cycle - parm_record_cut_row_head - parm_record_cut_row_tail
print('parm_ts_valid_cycle              : %3d seconds' % parm_ts_valid_cycle)
parm_ts_valid_month = parm_ts_month - parm_record_cut_month_head
print('parm_ts_valid_month              : %3d months' % parm_ts_valid_month)

if parm_record_cut_month_head < 10:
    parm_record_cut_ccyy = pd.to_datetime('2015-0'+str(parm_record_cut_month_head))
else:
    parm_record_cut_ccyy = pd.to_datetime('2015-'+str(parm_record_cut_month_head))

print('' )
print('parm_record_cut_ccyy             : %s' % parm_record_cut_ccyy)

print('parm_record_cut_month_head       : %3d months' % parm_record_cut_month_head)
print('parm_record_cut_row_head         : %3d seconds' % parm_record_cut_row_head)
print('parm_record_cut_row_tail         : %3d seconds' % parm_record_cut_row_tail)
print('' )


parm_calculate_base_price_second :  15 seconds
parm_calculate_target_second     :   7 seconds
parm_calculate_prev_bp           :  15 seconds
parm_calculate_mv                :  15 seconds
parm_calculate_prev_month        :   3 months

parm_ts_cycle                    :  61 seconds
parm_ts_month                    :  31 months
parm_ts_valid_cycle              :  39 seconds
parm_ts_valid_month              :  27 months

parm_record_cut_ccyy             : 2015-04-01 00:00:00
parm_record_cut_month_head       :   4 months
parm_record_cut_row_head         :  15 seconds
parm_record_cut_row_tail         :   7 seconds


In [5]:
df_history_ts_process.head()


Out[5]:
ccyy-mm time bid-price
0 2015-01 11:29:00 74000
1 2015-01 11:29:01 74000
2 2015-01 11:29:02 74000
3 2015-01 11:29:03 74000
4 2015-01 11:29:04 74000

In [ ]:

Prepare derived features

Process: df_history_ts_process


In [6]:
# date of current month
df_history_ts_process['date-curr'] = df_history_ts_process.apply(lambda row: pd.to_datetime(row['ccyy-mm']), axis=1)

# date of previous month
df_history_ts_process['date-prev'] = df_history_ts_process.apply(lambda row: row['date-curr'] - pd.offsets.MonthBegin(1), axis=1)


# Year
df_history_ts_process['year'] = df_history_ts_process.apply(lambda row: row['ccyy-mm'][0:4], axis=1)

# Month
df_history_ts_process['month'] = df_history_ts_process.apply(lambda row: row['ccyy-mm'][5:7], axis=1)

# Hour
df_history_ts_process['hour'] = df_history_ts_process.apply(lambda row: row['time'][0:2], axis=1)

# Minute
df_history_ts_process['minute'] = df_history_ts_process.apply(lambda row: row['time'][3:5], axis=1)

# Second
df_history_ts_process['second'] = df_history_ts_process.apply(lambda row: row['time'][6:8], axis=1)


# datetime of current month
df_history_ts_process['datetime-curr'] = df_history_ts_process.apply(lambda row: str(row['date-curr']) + ' ' + row['time'], axis=1)

# datetime of previous month
df_history_ts_process['datetime-prev'] = df_history_ts_process.apply(lambda row: str(row['date-prev']) + ' ' + row['time'], axis=1)

In [7]:
df_history_ts_process.tail()


Out[7]:
ccyy-mm time bid-price date-curr date-prev year month hour minute second datetime-curr datetime-prev
1886 2017-07 11:29:56 92100 2017-07-01 2017-06-01 2017 07 11 29 56 2017-07-01 00:00:00 11:29:56 2017-06-01 00:00:00 11:29:56
1887 2017-07 11:29:57 92100 2017-07-01 2017-06-01 2017 07 11 29 57 2017-07-01 00:00:00 11:29:57 2017-06-01 00:00:00 11:29:57
1888 2017-07 11:29:58 92100 2017-07-01 2017-06-01 2017 07 11 29 58 2017-07-01 00:00:00 11:29:58 2017-06-01 00:00:00 11:29:58
1889 2017-07 11:29:59 92200 2017-07-01 2017-06-01 2017 07 11 29 59 2017-07-01 00:00:00 11:29:59 2017-06-01 00:00:00 11:29:59
1890 2017-07 11:30:00 92200 2017-07-01 2017-06-01 2017 07 11 30 00 2017-07-01 00:00:00 11:30:00 2017-06-01 00:00:00 11:30:00

In [8]:
# df_history_ts_process
# df_history_ts_process[1768:]

In [9]:
# new ['base-price']
gap = 1 # only one new feature/column

for gap in range(1, gap+1):
    col_name = 'base-price'+str(parm_calculate_base_price_second)+'sec'
    col_name_base_price = col_name
    col_data = pd.DataFrame(columns=[col_name])
    print('Creating : ', col_name)  

    for month in range(0, parm_ts_month):
        for i in range(0, parm_ts_cycle):
            col_data.loc[month*parm_ts_cycle+i] = df_history_ts_process['bid-price'][month*parm_ts_cycle+parm_calculate_base_price_second]
  
    df_history_ts_process[col_name] = col_data

print('Total records processed : ', len(col_data))


Creating :  base-price15sec
Total records processed :  1891

In [10]:
# df_history_ts_process
# df_history_ts_process[1768:]

In [11]:
# new ['increment-price'] = ['bid-price'] - ['base-price']

df_history_ts_process['increment-price'] = df_history_ts_process.apply(lambda row: row['bid-price'] - row[col_name_base_price], axis=1)

In [12]:
# df_history_ts_process
# df_history_ts_process[1768:]

In [13]:
plt.figure()
plt.plot(df_history_ts_process['bid-price'])
plt.plot(df_history_ts_process[col_name_base_price])
plt.plot()
plt.figure()
plt.plot(df_history_ts_process['increment-price'])
plt.plot()


Out[13]:
[]
/home/user/env_py3/lib/python3.5/site-packages/matplotlib/font_manager.py:1297: UserWarning: findfont: Font family ['sans-serif'] not found. Falling back to DejaVu Sans
  (prop.get_family(), self.defaultFamily[fontext]))

['increment-price-target']


In [14]:
# previous N sec ['increment-price-target']

for gap in range(1, 2):
    col_name = 'increment-price-target'
    col_data = pd.DataFrame(columns=[col_name])
    print('Creating : ', col_name)  

    for month in range(0, parm_ts_month):
    #     print('month : ', month)
        for i in range(0, (parm_ts_cycle - parm_calculate_target_second)):
            col_data.loc[month*parm_ts_cycle+i] = df_history_ts_process['increment-price'][month*parm_ts_cycle+i+parm_calculate_target_second]
        for i in range((parm_ts_cycle - parm_calculate_target_second), parm_ts_cycle):
            col_data.loc[month*parm_ts_cycle+i] = 0
  
    df_history_ts_process[col_name] = col_data

print('Total records processed : ', len(col_data))


Creating :  increment-price-target
Total records processed :  1891

In [15]:
plt.figure()
plt.plot(df_history_ts_process['increment-price'])
plt.plot(df_history_ts_process['increment-price-target'])
plt.plot()

plt.figure()
plt.plot(df_history_ts_process['increment-price'][1768:])
plt.plot(df_history_ts_process['increment-price-target'][1768:])
plt.plot()


Out[15]:
[]
/home/user/env_py3/lib/python3.5/site-packages/matplotlib/font_manager.py:1297: UserWarning: findfont: Font family ['sans-serif'] not found. Falling back to DejaVu Sans
  (prop.get_family(), self.defaultFamily[fontext]))

In [ ]:


In [16]:
# previous 'parm_calculate_prev_bp' sec ['increment-price']
gap = parm_calculate_prev_bp

for gap in range(1, gap+1):
    col_name = 'increment-price-prev'+str(gap)+'sec'
    col_data = pd.DataFrame(columns=[col_name])
#     col_data_zeros = pd.DataFrame({col_name: np.zeros(gap)})
    print('Creating : ', col_name)  

    for month in range(0, parm_ts_month):
    #     print('month : ', month)
#         col_data.append(col_data_zeros)
        for i in range(0, gap):
            col_data.loc[month*parm_ts_cycle+i] = 0
        for i in range(gap, parm_ts_cycle):
            col_data.loc[month*parm_ts_cycle+i] = df_history_ts_process['increment-price'][month*parm_ts_cycle+i-gap]
  
    df_history_ts_process[col_name] = col_data

print('Total records processed : ', len(col_data))


Creating :  increment-price-prev1sec
Creating :  increment-price-prev2sec
Creating :  increment-price-prev3sec
Creating :  increment-price-prev4sec
Creating :  increment-price-prev5sec
Creating :  increment-price-prev6sec
Creating :  increment-price-prev7sec
Creating :  increment-price-prev8sec
Creating :  increment-price-prev9sec
Creating :  increment-price-prev10sec
Creating :  increment-price-prev11sec
Creating :  increment-price-prev12sec
Creating :  increment-price-prev13sec
Creating :  increment-price-prev14sec
Creating :  increment-price-prev15sec
Total records processed :  1891

In [17]:
# previous 'parm_calculate_mv' sec Moving Average ['increment-price']

gap = parm_calculate_mv

for gap in range(2, gap+1): # MV starts from 2 seconds, till parm_calculate_mv
    col_name = 'increment-price-mv'+str(gap)+'sec'
    col_data = pd.DataFrame(columns=[col_name])
    print('Creating : ', col_name)  

    for month in range(0, parm_ts_month):
    #     print('month : ', month)
        for i in range(0, gap):
            col_data.loc[month*parm_ts_cycle+i] = 0
        for i in range(gap, parm_ts_cycle):
            col_data.loc[month*parm_ts_cycle+i] = \
            np.mean(df_history_ts_process['increment-price'][month*parm_ts_cycle+i-gap:month*parm_ts_cycle+i])
  
    df_history_ts_process[col_name] = col_data

print('Total records processed : ', len(col_data))


Creating :  increment-price-mv2sec
Creating :  increment-price-mv3sec
Creating :  increment-price-mv4sec
Creating :  increment-price-mv5sec
Creating :  increment-price-mv6sec
Creating :  increment-price-mv7sec
Creating :  increment-price-mv8sec
Creating :  increment-price-mv9sec
Creating :  increment-price-mv10sec
Creating :  increment-price-mv11sec
Creating :  increment-price-mv12sec
Creating :  increment-price-mv13sec
Creating :  increment-price-mv14sec
Creating :  increment-price-mv15sec
Total records processed :  1891

In [18]:
# df_history_ts_process[1768:]

In [19]:
plt.figure()
plt.plot(df_history_ts_process['increment-price'][1768:])
plt.plot(df_history_ts_process['increment-price-prev3sec'][1768:])
plt.plot(df_history_ts_process['increment-price-prev7sec'][1768:])
plt.plot(df_history_ts_process['increment-price-prev11sec'][1768:])
plt.plot(df_history_ts_process['increment-price-prev15sec'][1768:])
plt.plot()


Out[19]:
[]
/home/user/env_py3/lib/python3.5/site-packages/matplotlib/font_manager.py:1297: UserWarning: findfont: Font family ['sans-serif'] not found. Falling back to DejaVu Sans
  (prop.get_family(), self.defaultFamily[fontext]))

In [20]:
plt.figure()
plt.plot(df_history_ts_process['increment-price'][1768:])
plt.plot(df_history_ts_process['increment-price-mv3sec'][1768:])
plt.plot(df_history_ts_process['increment-price-mv7sec'][1768:])
plt.plot(df_history_ts_process['increment-price-mv11sec'][1768:])
plt.plot(df_history_ts_process['increment-price-mv15sec'][1768:])
plt.plot()


Out[20]:
[]
/home/user/env_py3/lib/python3.5/site-packages/matplotlib/font_manager.py:1297: UserWarning: findfont: Font family ['sans-serif'] not found. Falling back to DejaVu Sans
  (prop.get_family(), self.defaultFamily[fontext]))

In [ ]:

Process: df_history_table_process


In [21]:
df_history_table_process.tail()


Out[21]:
ccyy-mm volume-plate deal-price-low deal-price-avg deal-early-second volume-bidder
26 2017-03 10356 87800 87916 55 262010
27 2017-04 12196 89800 89850 59 252273
28 2017-05 10316 90100 90209 55 270197
29 2017-06 10312 89400 89532 45 244349
30 2017-07 10325 92200 92250 57 269189

In [22]:
# date of current month
df_history_table_process['date-curr'] = df_history_table_process.apply(lambda row: pd.to_datetime(row['ccyy-mm']), axis=1)
df_history_table_process['d-avg-low-price'] = df_history_table_process.apply(lambda row: row['deal-price-avg'] - row['deal-price-low'], axis=1)
df_history_table_process['ratio-bid'] = df_history_table_process.apply(lambda row: row['volume-plate'] / row['volume-bidder'], axis=1)

In [23]:
df_history_table_process.tail()


Out[23]:
ccyy-mm volume-plate deal-price-low deal-price-avg deal-early-second volume-bidder date-curr d-avg-low-price ratio-bid
26 2017-03 10356 87800 87916 55 262010 2017-03-01 116 0.039525
27 2017-04 12196 89800 89850 59 252273 2017-04-01 50 0.048344
28 2017-05 10316 90100 90209 55 270197 2017-05-01 109 0.038180
29 2017-06 10312 89400 89532 45 244349 2017-06-01 132 0.042202
30 2017-07 10325 92200 92250 57 269189 2017-07-01 50 0.038356

Merge dataframe


In [24]:
df_history_ts_process_tmp2 = df_history_ts_process.copy()

In [25]:
df_history_ts_process = df_history_ts_process_tmp2.copy()

In [26]:
# look up current month table data: 'volume-plate', 'ratio-bid'
df_history_ts_process = pd.merge(df_history_ts_process, df_history_table_process[['date-curr', 'volume-plate', 'ratio-bid']], how = 'left', left_on = 'date-curr', right_on = 'date-curr', suffixes=['', '_table'])

In [27]:
for i in range(0, len(df_history_ts_process.columns)): print(df_history_ts_process.columns[i])


ccyy-mm
time
bid-price
date-curr
date-prev
year
month
hour
minute
second
datetime-curr
datetime-prev
base-price15sec
increment-price
increment-price-target
increment-price-prev1sec
increment-price-prev2sec
increment-price-prev3sec
increment-price-prev4sec
increment-price-prev5sec
increment-price-prev6sec
increment-price-prev7sec
increment-price-prev8sec
increment-price-prev9sec
increment-price-prev10sec
increment-price-prev11sec
increment-price-prev12sec
increment-price-prev13sec
increment-price-prev14sec
increment-price-prev15sec
increment-price-mv2sec
increment-price-mv3sec
increment-price-mv4sec
increment-price-mv5sec
increment-price-mv6sec
increment-price-mv7sec
increment-price-mv8sec
increment-price-mv9sec
increment-price-mv10sec
increment-price-mv11sec
increment-price-mv12sec
increment-price-mv13sec
increment-price-mv14sec
increment-price-mv15sec
volume-plate
ratio-bid

In [28]:
# look up pevious month table data: 'volume-plate', 'ratio-bid', 'deal-early-second', 'deal-price-avg', 'd-avg-low-price'
df_history_ts_process = pd.merge(df_history_ts_process, df_history_table_process[['date-curr', 'volume-plate', 'ratio-bid', 'deal-early-second', 'deal-price-avg', 'd-avg-low-price']], how = 'left', left_on = 'date-prev', right_on = 'date-curr', suffixes=['', '_m0'])

In [31]:
df_history_ts_process['d-increment-avg-low-price_m0'] = df_history_ts_process.apply(lambda row: row['increment-price'] - row['d-avg-low-price'], axis=1)

In [32]:
for i in range(0, len(df_history_ts_process.columns)): print(df_history_ts_process.columns[i])


ccyy-mm
time
bid-price
date-curr
date-prev
year
month
hour
minute
second
datetime-curr
datetime-prev
base-price15sec
increment-price
increment-price-target
increment-price-prev1sec
increment-price-prev2sec
increment-price-prev3sec
increment-price-prev4sec
increment-price-prev5sec
increment-price-prev6sec
increment-price-prev7sec
increment-price-prev8sec
increment-price-prev9sec
increment-price-prev10sec
increment-price-prev11sec
increment-price-prev12sec
increment-price-prev13sec
increment-price-prev14sec
increment-price-prev15sec
increment-price-mv2sec
increment-price-mv3sec
increment-price-mv4sec
increment-price-mv5sec
increment-price-mv6sec
increment-price-mv7sec
increment-price-mv8sec
increment-price-mv9sec
increment-price-mv10sec
increment-price-mv11sec
increment-price-mv12sec
increment-price-mv13sec
increment-price-mv14sec
increment-price-mv15sec
volume-plate
ratio-bid
date-curr_m0
volume-plate_m0
ratio-bid_m0
deal-early-second
deal-price-avg
d-avg-low-price
d-increment-avg-low-price_m0

Shift to copy previous 'parm_calculate_prev_month' month's data into current row


In [30]:
# df_history_ts_process = df_history_ts_process_lookup.copy()

In [33]:
df_history_ts_process_lookup = df_history_ts_process.copy()
df_history_ts_process_lookup.tail()


Out[33]:
ccyy-mm time bid-price date-curr date-prev year month hour minute second ... increment-price-mv15sec volume-plate ratio-bid date-curr_m0 volume-plate_m0 ratio-bid_m0 deal-early-second deal-price-avg d-avg-low-price d-increment-avg-low-price_m0
1886 2017-07 11:29:56 92100 2017-07-01 2017-06-01 2017 07 11 29 56 ... 800 10325 0.038356 2017-06-01 10312.0 0.042202 45.0 89532.0 132.0 1368.0
1887 2017-07 11:29:57 92100 2017-07-01 2017-06-01 2017 07 11 29 57 ... 873.333 10325 0.038356 2017-06-01 10312.0 0.042202 45.0 89532.0 132.0 1368.0
1888 2017-07 11:29:58 92100 2017-07-01 2017-06-01 2017 07 11 29 58 ... 946.667 10325 0.038356 2017-06-01 10312.0 0.042202 45.0 89532.0 132.0 1368.0
1889 2017-07 11:29:59 92200 2017-07-01 2017-06-01 2017 07 11 29 59 ... 1020 10325 0.038356 2017-06-01 10312.0 0.042202 45.0 89532.0 132.0 1468.0
1890 2017-07 11:30:00 92200 2017-07-01 2017-06-01 2017 07 11 30 00 ... 1093.33 10325 0.038356 2017-06-01 10312.0 0.042202 45.0 89532.0 132.0 1468.0

5 rows × 53 columns


In [34]:
# _m1
df_history_ts_process = pd.merge(df_history_ts_process, df_history_ts_process_lookup[[ \
        'datetime-curr', 'datetime-prev', 
        'base-price15sec', 'increment-price', 'increment-price-target',
        'increment-price-prev1sec', 'increment-price-prev2sec',
        'increment-price-prev3sec', 'increment-price-prev4sec',
        'increment-price-prev5sec', 'increment-price-prev6sec',
        'increment-price-prev7sec', 'increment-price-prev8sec',
        'increment-price-prev9sec', 'increment-price-prev10sec',
        'increment-price-prev11sec', 'increment-price-prev12sec',
        'increment-price-prev13sec', 'increment-price-prev14sec',
        'increment-price-prev15sec', 
        'increment-price-mv2sec',
        'increment-price-mv3sec', 'increment-price-mv4sec',
        'increment-price-mv5sec', 'increment-price-mv6sec',
        'increment-price-mv7sec', 'increment-price-mv8sec',
        'increment-price-mv9sec', 'increment-price-mv10sec',
        'increment-price-mv11sec', 'increment-price-mv12sec',
        'increment-price-mv13sec', 'increment-price-mv14sec',
        'increment-price-mv15sec', 
        'volume-plate_m0', 
        'ratio-bid_m0', 
        'deal-early-second',
        'deal-price-avg',
        'd-avg-low-price',
        'd-increment-avg-low-price_m0'
        ]], how = 'left', left_on = 'datetime-prev', right_on = 'datetime-curr', suffixes=['', '_m1'])
df_history_ts_process.tail()


Out[34]:
ccyy-mm time bid-price date-curr date-prev year month hour minute second ... increment-price-mv12sec_m1 increment-price-mv13sec_m1 increment-price-mv14sec_m1 increment-price-mv15sec_m1 volume-plate_m0_m1 ratio-bid_m0_m1 deal-early-second_m1 deal-price-avg_m1 d-avg-low-price_m1 d-increment-avg-low-price_m0_m1
1886 2017-07 11:29:56 92100 2017-07-01 2017-06-01 2017 07 11 29 56 ... 616.667 607.692 592.857 580 10316.0 0.03818 55.0 90209.0 109.0 591.0
1887 2017-07 11:29:57 92100 2017-07-01 2017-06-01 2017 07 11 29 57 ... 633.333 623.077 614.286 600 10316.0 0.03818 55.0 90209.0 109.0 691.0
1888 2017-07 11:29:58 92100 2017-07-01 2017-06-01 2017 07 11 29 58 ... 650 646.154 635.714 626.667 10316.0 0.03818 55.0 90209.0 109.0 691.0
1889 2017-07 11:29:59 92200 2017-07-01 2017-06-01 2017 07 11 29 59 ... 666.667 661.538 657.143 646.667 10316.0 0.03818 55.0 90209.0 109.0 691.0
1890 2017-07 11:30:00 92200 2017-07-01 2017-06-01 2017 07 11 30 00 ... 683.333 676.923 671.429 666.667 10316.0 0.03818 55.0 90209.0 109.0 791.0

5 rows × 93 columns


In [35]:
# _m2
df_history_ts_process = pd.merge(df_history_ts_process, df_history_ts_process_lookup[[ \
        'datetime-curr', 'datetime-prev', 
        'base-price15sec', 'increment-price', 'increment-price-target',
        'increment-price-prev1sec', 'increment-price-prev2sec',
        'increment-price-prev3sec', 'increment-price-prev4sec',
        'increment-price-prev5sec', 'increment-price-prev6sec',
        'increment-price-prev7sec', 'increment-price-prev8sec',
        'increment-price-prev9sec', 'increment-price-prev10sec',
        'increment-price-prev11sec', 'increment-price-prev12sec',
        'increment-price-prev13sec', 'increment-price-prev14sec',
        'increment-price-prev15sec', 
        'increment-price-mv2sec',
        'increment-price-mv3sec', 'increment-price-mv4sec',
        'increment-price-mv5sec', 'increment-price-mv6sec',
        'increment-price-mv7sec', 'increment-price-mv8sec',
        'increment-price-mv9sec', 'increment-price-mv10sec',
        'increment-price-mv11sec', 'increment-price-mv12sec',
        'increment-price-mv13sec', 'increment-price-mv14sec',
        'increment-price-mv15sec', 
        'volume-plate_m0', 
        'ratio-bid_m0', 
        'deal-early-second',
        'deal-price-avg',
        'd-avg-low-price',
        'd-increment-avg-low-price_m0'                                                                                   
        ]], how = 'left', left_on = 'datetime-prev_m1', right_on = 'datetime-curr', suffixes=['', '_m2'])
df_history_ts_process.tail()


Out[35]:
ccyy-mm time bid-price date-curr date-prev year month hour minute second ... increment-price-mv12sec_m2 increment-price-mv13sec_m2 increment-price-mv14sec_m2 increment-price-mv15sec_m2 volume-plate_m0_m2 ratio-bid_m0_m2 deal-early-second_m2 deal-price-avg_m2 d-avg-low-price_m2 d-increment-avg-low-price_m0_m2
1886 2017-07 11:29:56 92100 2017-07-01 2017-06-01 2017 07 11 29 56 ... 658.333 630.769 607.143 586.667 12196.0 0.048344 59.0 89850.0 50.0 1050.0
1887 2017-07 11:29:57 92100 2017-07-01 2017-06-01 2017 07 11 29 57 ... 716.667 692.308 664.286 640 12196.0 0.048344 59.0 89850.0 50.0 1150.0
1888 2017-07 11:29:58 92100 2017-07-01 2017-06-01 2017 07 11 29 58 ... 783.333 753.846 728.571 700 12196.0 0.048344 59.0 89850.0 50.0 1250.0
1889 2017-07 11:29:59 92200 2017-07-01 2017-06-01 2017 07 11 29 59 ... 850 823.077 792.857 766.667 12196.0 0.048344 59.0 89850.0 50.0 1250.0
1890 2017-07 11:30:00 92200 2017-07-01 2017-06-01 2017 07 11 30 00 ... 916.667 884.615 857.143 826.667 12196.0 0.048344 59.0 89850.0 50.0 1250.0

5 rows × 133 columns


In [36]:
# _m3
df_history_ts_process = pd.merge(df_history_ts_process, df_history_ts_process_lookup[[ \
        'datetime-curr', 'datetime-prev', 
        'base-price15sec', 'increment-price', 'increment-price-target',
        'increment-price-prev1sec', 'increment-price-prev2sec',
        'increment-price-prev3sec', 'increment-price-prev4sec',
        'increment-price-prev5sec', 'increment-price-prev6sec',
        'increment-price-prev7sec', 'increment-price-prev8sec',
        'increment-price-prev9sec', 'increment-price-prev10sec',
        'increment-price-prev11sec', 'increment-price-prev12sec',
        'increment-price-prev13sec', 'increment-price-prev14sec',
        'increment-price-prev15sec', 
        'increment-price-mv2sec',
        'increment-price-mv3sec', 'increment-price-mv4sec',
        'increment-price-mv5sec', 'increment-price-mv6sec',
        'increment-price-mv7sec', 'increment-price-mv8sec',
        'increment-price-mv9sec', 'increment-price-mv10sec',
        'increment-price-mv11sec', 'increment-price-mv12sec',
        'increment-price-mv13sec', 'increment-price-mv14sec',
        'increment-price-mv15sec', 
        'volume-plate_m0', 
        'ratio-bid_m0', 
        'deal-early-second',
        'deal-price-avg',
        'd-avg-low-price',
        'd-increment-avg-low-price_m0'                                                                                  
        ]], how = 'left', left_on = 'datetime-prev_m2', right_on = 'datetime-curr', suffixes=['', '_m3'])
df_history_ts_process.tail()


Out[36]:
ccyy-mm time bid-price date-curr date-prev year month hour minute second ... increment-price-mv12sec_m3 increment-price-mv13sec_m3 increment-price-mv14sec_m3 increment-price-mv15sec_m3 volume-plate_m0_m3 ratio-bid_m0_m3 deal-early-second_m3 deal-price-avg_m3 d-avg-low-price_m3 d-increment-avg-low-price_m0_m3
1886 2017-07 11:29:56 92100 2017-07-01 2017-06-01 2017 07 11 29 56 ... 691.667 669.231 650 626.667 10356.0 0.039525 55.0 87916.0 116.0 784.0
1887 2017-07 11:29:57 92100 2017-07-01 2017-06-01 2017 07 11 29 57 ... 725 707.692 685.714 666.667 10356.0 0.039525 55.0 87916.0 116.0 884.0
1888 2017-07 11:29:58 92100 2017-07-01 2017-06-01 2017 07 11 29 58 ... 758.333 746.154 728.571 706.667 10356.0 0.039525 55.0 87916.0 116.0 884.0
1889 2017-07 11:29:59 92200 2017-07-01 2017-06-01 2017 07 11 29 59 ... 791.667 776.923 764.286 746.667 10356.0 0.039525 55.0 87916.0 116.0 984.0
1890 2017-07 11:30:00 92200 2017-07-01 2017-06-01 2017 07 11 30 00 ... 833.333 815.385 800 786.667 10356.0 0.039525 55.0 87916.0 116.0 1184.0

5 rows × 173 columns


In [37]:
plt.figure()
plt.plot(df_history_ts_process['increment-price-mv10sec'][1768:])
plt.plot(df_history_ts_process['increment-price-mv10sec_m1'][1768:])
plt.plot(df_history_ts_process['increment-price-mv10sec_m2'][1768:])
plt.plot(df_history_ts_process['increment-price-mv10sec_m3'][1768:])
plt.figure()
plt.plot(df_history_ts_process['increment-price-prev10sec'][1768:])
plt.plot(df_history_ts_process['increment-price-prev10sec_m1'][1768:])
plt.plot(df_history_ts_process['increment-price-prev10sec_m2'][1768:])
plt.plot(df_history_ts_process['increment-price-prev10sec_m3'][1768:])
plt.figure()
plt.plot(df_history_ts_process['increment-price'][1768:])
plt.plot(df_history_ts_process['increment-price_m1'][1768:])
plt.plot(df_history_ts_process['increment-price_m2'][1768:])
plt.plot(df_history_ts_process['increment-price_m3'][1768:])
plt.figure()
plt.plot(df_history_ts_process['increment-price-target'][1768:])
plt.plot(df_history_ts_process['increment-price-target_m1'][1768:])
plt.plot(df_history_ts_process['increment-price-target_m2'][1768:])
plt.plot(df_history_ts_process['increment-price-target_m3'][1768:])

plt.plot()


Out[37]:
[]
/home/user/env_py3/lib/python3.5/site-packages/matplotlib/font_manager.py:1297: UserWarning: findfont: Font family ['sans-serif'] not found. Falling back to DejaVu Sans
  (prop.get_family(), self.defaultFamily[fontext]))

In [ ]:

Housekeeping to remove some invald data during pre-processing


In [38]:
for i in range(0, len(df_history_ts_process.columns)): print(df_history_ts_process.columns[i])


ccyy-mm
time
bid-price
date-curr
date-prev
year
month
hour
minute
second
datetime-curr
datetime-prev
base-price15sec
increment-price
increment-price-target
increment-price-prev1sec
increment-price-prev2sec
increment-price-prev3sec
increment-price-prev4sec
increment-price-prev5sec
increment-price-prev6sec
increment-price-prev7sec
increment-price-prev8sec
increment-price-prev9sec
increment-price-prev10sec
increment-price-prev11sec
increment-price-prev12sec
increment-price-prev13sec
increment-price-prev14sec
increment-price-prev15sec
increment-price-mv2sec
increment-price-mv3sec
increment-price-mv4sec
increment-price-mv5sec
increment-price-mv6sec
increment-price-mv7sec
increment-price-mv8sec
increment-price-mv9sec
increment-price-mv10sec
increment-price-mv11sec
increment-price-mv12sec
increment-price-mv13sec
increment-price-mv14sec
increment-price-mv15sec
volume-plate
ratio-bid
date-curr_m0
volume-plate_m0
ratio-bid_m0
deal-early-second
deal-price-avg
d-avg-low-price
d-increment-avg-low-price_m0
datetime-curr_m1
datetime-prev_m1
base-price15sec_m1
increment-price_m1
increment-price-target_m1
increment-price-prev1sec_m1
increment-price-prev2sec_m1
increment-price-prev3sec_m1
increment-price-prev4sec_m1
increment-price-prev5sec_m1
increment-price-prev6sec_m1
increment-price-prev7sec_m1
increment-price-prev8sec_m1
increment-price-prev9sec_m1
increment-price-prev10sec_m1
increment-price-prev11sec_m1
increment-price-prev12sec_m1
increment-price-prev13sec_m1
increment-price-prev14sec_m1
increment-price-prev15sec_m1
increment-price-mv2sec_m1
increment-price-mv3sec_m1
increment-price-mv4sec_m1
increment-price-mv5sec_m1
increment-price-mv6sec_m1
increment-price-mv7sec_m1
increment-price-mv8sec_m1
increment-price-mv9sec_m1
increment-price-mv10sec_m1
increment-price-mv11sec_m1
increment-price-mv12sec_m1
increment-price-mv13sec_m1
increment-price-mv14sec_m1
increment-price-mv15sec_m1
volume-plate_m0_m1
ratio-bid_m0_m1
deal-early-second_m1
deal-price-avg_m1
d-avg-low-price_m1
d-increment-avg-low-price_m0_m1
datetime-curr_m2
datetime-prev_m2
base-price15sec_m2
increment-price_m2
increment-price-target_m2
increment-price-prev1sec_m2
increment-price-prev2sec_m2
increment-price-prev3sec_m2
increment-price-prev4sec_m2
increment-price-prev5sec_m2
increment-price-prev6sec_m2
increment-price-prev7sec_m2
increment-price-prev8sec_m2
increment-price-prev9sec_m2
increment-price-prev10sec_m2
increment-price-prev11sec_m2
increment-price-prev12sec_m2
increment-price-prev13sec_m2
increment-price-prev14sec_m2
increment-price-prev15sec_m2
increment-price-mv2sec_m2
increment-price-mv3sec_m2
increment-price-mv4sec_m2
increment-price-mv5sec_m2
increment-price-mv6sec_m2
increment-price-mv7sec_m2
increment-price-mv8sec_m2
increment-price-mv9sec_m2
increment-price-mv10sec_m2
increment-price-mv11sec_m2
increment-price-mv12sec_m2
increment-price-mv13sec_m2
increment-price-mv14sec_m2
increment-price-mv15sec_m2
volume-plate_m0_m2
ratio-bid_m0_m2
deal-early-second_m2
deal-price-avg_m2
d-avg-low-price_m2
d-increment-avg-low-price_m0_m2
datetime-curr_m3
datetime-prev_m3
base-price15sec_m3
increment-price_m3
increment-price-target_m3
increment-price-prev1sec_m3
increment-price-prev2sec_m3
increment-price-prev3sec_m3
increment-price-prev4sec_m3
increment-price-prev5sec_m3
increment-price-prev6sec_m3
increment-price-prev7sec_m3
increment-price-prev8sec_m3
increment-price-prev9sec_m3
increment-price-prev10sec_m3
increment-price-prev11sec_m3
increment-price-prev12sec_m3
increment-price-prev13sec_m3
increment-price-prev14sec_m3
increment-price-prev15sec_m3
increment-price-mv2sec_m3
increment-price-mv3sec_m3
increment-price-mv4sec_m3
increment-price-mv5sec_m3
increment-price-mv6sec_m3
increment-price-mv7sec_m3
increment-price-mv8sec_m3
increment-price-mv9sec_m3
increment-price-mv10sec_m3
increment-price-mv11sec_m3
increment-price-mv12sec_m3
increment-price-mv13sec_m3
increment-price-mv14sec_m3
increment-price-mv15sec_m3
volume-plate_m0_m3
ratio-bid_m0_m3
deal-early-second_m3
deal-price-avg_m3
d-avg-low-price_m3
d-increment-avg-low-price_m0_m3

In [39]:
# housekeeping: delete some columns
# df_history_ts_process.drop('date-curr_y', axis=1, inplace=True)

In [40]:
parm_record_cut_ccyy


Out[40]:
Timestamp('2015-04-01 00:00:00')

In [41]:
# remove first 'parm_record_cut_ccyy' months from dataset
df_history_ts_process = df_history_ts_process[df_history_ts_process['date-curr'] > parm_record_cut_ccyy]

In [42]:
# total 61 seconds/rows per month:
# remove first 'parm_record_cut_row_head' reconds
# remove last 'parm_record_cut_row_tail' reconds
df_history_ts_process = df_history_ts_process[df_history_ts_process['second'] >= str(parm_record_cut_row_head) ]
df_history_ts_process = df_history_ts_process[df_history_ts_process['second'] <= str(60 - parm_record_cut_row_tail) ]
# df_history_ts_process = df_history_ts_process[df_history_ts_process['second'] > parm_record_cut_row_head ]

In [43]:
# Reset index after housekeeping
df_history_ts_process = df_history_ts_process.reset_index(drop=True)

In [44]:
df_history_ts_process.head()


Out[44]:
ccyy-mm time bid-price date-curr date-prev year month hour minute second ... increment-price-mv12sec_m3 increment-price-mv13sec_m3 increment-price-mv14sec_m3 increment-price-mv15sec_m3 volume-plate_m0_m3 ratio-bid_m0_m3 deal-early-second_m3 deal-price-avg_m3 d-avg-low-price_m3 d-increment-avg-low-price_m0_m3
0 2015-05 11:29:15 78400 2015-05-01 2015-04-01 2015 05 11 29 15 ... 0 0 0 -6.66667 7990.0 0.081362 48.0 74216.0 216.0 -216.0
1 2015-05 11:29:16 78400 2015-05-01 2015-04-01 2015 05 11 29 16 ... 0 0 0 0 7990.0 0.081362 48.0 74216.0 216.0 -216.0
2 2015-05 11:29:17 78400 2015-05-01 2015-04-01 2015 05 11 29 17 ... 0 0 0 0 7990.0 0.081362 48.0 74216.0 216.0 -216.0
3 2015-05 11:29:18 78400 2015-05-01 2015-04-01 2015 05 11 29 18 ... 0 0 0 0 7990.0 0.081362 48.0 74216.0 216.0 -216.0
4 2015-05 11:29:19 78500 2015-05-01 2015-04-01 2015 05 11 29 19 ... 0 0 0 0 7990.0 0.081362 48.0 74216.0 216.0 -216.0

5 rows × 173 columns


In [45]:
df_history_ts_process.tail()


Out[45]:
ccyy-mm time bid-price date-curr date-prev year month hour minute second ... increment-price-mv12sec_m3 increment-price-mv13sec_m3 increment-price-mv14sec_m3 increment-price-mv15sec_m3 volume-plate_m0_m3 ratio-bid_m0_m3 deal-early-second_m3 deal-price-avg_m3 d-avg-low-price_m3 d-increment-avg-low-price_m0_m3
1048 2017-07 11:29:49 91400 2017-07-01 2017-06-01 2017 07 11 29 49 ... 441.667 430.769 421.429 413.333 10356.0 0.039525 55.0 87916.0 116.0 584.0
1049 2017-07 11:29:50 91500 2017-07-01 2017-06-01 2017 07 11 29 50 ... 475 461.538 450 440 10356.0 0.039525 55.0 87916.0 116.0 584.0
1050 2017-07 11:29:51 91600 2017-07-01 2017-06-01 2017 07 11 29 51 ... 508.333 492.308 478.571 466.667 10356.0 0.039525 55.0 87916.0 116.0 584.0
1051 2017-07 11:29:52 91700 2017-07-01 2017-06-01 2017 07 11 29 52 ... 541.667 523.077 507.143 493.333 10356.0 0.039525 55.0 87916.0 116.0 584.0
1052 2017-07 11:29:53 91800 2017-07-01 2017-06-01 2017 07 11 29 53 ... 575 553.846 535.714 520 10356.0 0.039525 55.0 87916.0 116.0 684.0

5 rows × 173 columns


In [46]:
plt.figure()
plt.plot(df_history_ts_process['increment-price'][974:])
plt.plot(df_history_ts_process['increment-price-mv3sec'][974:])
plt.plot(df_history_ts_process['increment-price-mv7sec'][974:])
plt.plot(df_history_ts_process['increment-price-mv11sec'][974:])
plt.plot(df_history_ts_process['increment-price-mv15sec'][974:])
plt.figure()
plt.plot(df_history_ts_process['increment-price-mv15sec'][974:])
plt.plot(df_history_ts_process['increment-price-mv15sec_m1'][974:])
plt.plot(df_history_ts_process['increment-price-mv15sec_m2'][974:])
plt.plot(df_history_ts_process['increment-price-mv15sec_m3'][974:])
plt.plot()


Out[46]:
[]
/home/user/env_py3/lib/python3.5/site-packages/matplotlib/font_manager.py:1297: UserWarning: findfont: Font family ['sans-serif'] not found. Falling back to DejaVu Sans
  (prop.get_family(), self.defaultFamily[fontext]))

In [ ]:

[3] Modeling Part 2: Python scikit-learn

Models to use:

  • GradientBoostingClassifier
  • RandomForestClassifier
  • AdaBoostClassifier
  • ExtraTreesClassifier
  • BaggingClassifier
  • LogisticRegression
  • SVM kernal RBF
  • SVM kernal Linear
  • KNeighborsClassifier

Import pre-processed data


In [48]:
# plt.plot(df_history_ts_process['d-avg-low-price'])
# plt.figure()
# plt.figure()
# plt.plot(df_history_ts_process['d-avg-low-price_m1'])
# plt.figure()
# plt.plot(df_history_ts_process['d-avg-low-price_m2'])
# plt.figure()
# plt.plot(df_history_ts_process['d-avg-low-price_m3'])

In [49]:
for i in range(0, len(df_history_ts_process.columns)): print(df_history_ts_process.columns[i])


ccyy-mm
time
bid-price
date-curr
date-prev
year
month
hour
minute
second
datetime-curr
datetime-prev
base-price15sec
increment-price
increment-price-target
increment-price-prev1sec
increment-price-prev2sec
increment-price-prev3sec
increment-price-prev4sec
increment-price-prev5sec
increment-price-prev6sec
increment-price-prev7sec
increment-price-prev8sec
increment-price-prev9sec
increment-price-prev10sec
increment-price-prev11sec
increment-price-prev12sec
increment-price-prev13sec
increment-price-prev14sec
increment-price-prev15sec
increment-price-mv2sec
increment-price-mv3sec
increment-price-mv4sec
increment-price-mv5sec
increment-price-mv6sec
increment-price-mv7sec
increment-price-mv8sec
increment-price-mv9sec
increment-price-mv10sec
increment-price-mv11sec
increment-price-mv12sec
increment-price-mv13sec
increment-price-mv14sec
increment-price-mv15sec
volume-plate
ratio-bid
date-curr_m0
volume-plate_m0
ratio-bid_m0
deal-early-second
deal-price-avg
d-avg-low-price
d-increment-avg-low-price_m0
datetime-curr_m1
datetime-prev_m1
base-price15sec_m1
increment-price_m1
increment-price-target_m1
increment-price-prev1sec_m1
increment-price-prev2sec_m1
increment-price-prev3sec_m1
increment-price-prev4sec_m1
increment-price-prev5sec_m1
increment-price-prev6sec_m1
increment-price-prev7sec_m1
increment-price-prev8sec_m1
increment-price-prev9sec_m1
increment-price-prev10sec_m1
increment-price-prev11sec_m1
increment-price-prev12sec_m1
increment-price-prev13sec_m1
increment-price-prev14sec_m1
increment-price-prev15sec_m1
increment-price-mv2sec_m1
increment-price-mv3sec_m1
increment-price-mv4sec_m1
increment-price-mv5sec_m1
increment-price-mv6sec_m1
increment-price-mv7sec_m1
increment-price-mv8sec_m1
increment-price-mv9sec_m1
increment-price-mv10sec_m1
increment-price-mv11sec_m1
increment-price-mv12sec_m1
increment-price-mv13sec_m1
increment-price-mv14sec_m1
increment-price-mv15sec_m1
volume-plate_m0_m1
ratio-bid_m0_m1
deal-early-second_m1
deal-price-avg_m1
d-avg-low-price_m1
d-increment-avg-low-price_m0_m1
datetime-curr_m2
datetime-prev_m2
base-price15sec_m2
increment-price_m2
increment-price-target_m2
increment-price-prev1sec_m2
increment-price-prev2sec_m2
increment-price-prev3sec_m2
increment-price-prev4sec_m2
increment-price-prev5sec_m2
increment-price-prev6sec_m2
increment-price-prev7sec_m2
increment-price-prev8sec_m2
increment-price-prev9sec_m2
increment-price-prev10sec_m2
increment-price-prev11sec_m2
increment-price-prev12sec_m2
increment-price-prev13sec_m2
increment-price-prev14sec_m2
increment-price-prev15sec_m2
increment-price-mv2sec_m2
increment-price-mv3sec_m2
increment-price-mv4sec_m2
increment-price-mv5sec_m2
increment-price-mv6sec_m2
increment-price-mv7sec_m2
increment-price-mv8sec_m2
increment-price-mv9sec_m2
increment-price-mv10sec_m2
increment-price-mv11sec_m2
increment-price-mv12sec_m2
increment-price-mv13sec_m2
increment-price-mv14sec_m2
increment-price-mv15sec_m2
volume-plate_m0_m2
ratio-bid_m0_m2
deal-early-second_m2
deal-price-avg_m2
d-avg-low-price_m2
d-increment-avg-low-price_m0_m2
datetime-curr_m3
datetime-prev_m3
base-price15sec_m3
increment-price_m3
increment-price-target_m3
increment-price-prev1sec_m3
increment-price-prev2sec_m3
increment-price-prev3sec_m3
increment-price-prev4sec_m3
increment-price-prev5sec_m3
increment-price-prev6sec_m3
increment-price-prev7sec_m3
increment-price-prev8sec_m3
increment-price-prev9sec_m3
increment-price-prev10sec_m3
increment-price-prev11sec_m3
increment-price-prev12sec_m3
increment-price-prev13sec_m3
increment-price-prev14sec_m3
increment-price-prev15sec_m3
increment-price-mv2sec_m3
increment-price-mv3sec_m3
increment-price-mv4sec_m3
increment-price-mv5sec_m3
increment-price-mv6sec_m3
increment-price-mv7sec_m3
increment-price-mv8sec_m3
increment-price-mv9sec_m3
increment-price-mv10sec_m3
increment-price-mv11sec_m3
increment-price-mv12sec_m3
increment-price-mv13sec_m3
increment-price-mv14sec_m3
increment-price-mv15sec_m3
volume-plate_m0_m3
ratio-bid_m0_m3
deal-early-second_m3
deal-price-avg_m3
d-avg-low-price_m3
d-increment-avg-low-price_m0_m3

In [50]:
X = df_history_ts_process[[
#          ,'ccyy-mm'
#         ,'time'
#         ,'bid-price'
#         ,'date-curr'
#         ,'date-prev'
#         ,'year'
         'month'
#         ,'hour'
#         ,'minute'
        ,'second'
#         ,'datetime-curr'
#         ,'datetime-prev'
        ,'base-price15sec'
        ,'increment-price'
#         ,'increment-price-target'   # <<<<<<< This is target 
        ,'increment-price-prev1sec'
        ,'increment-price-prev2sec'
        ,'increment-price-prev3sec'
        ,'increment-price-prev4sec'
        ,'increment-price-prev5sec'
        ,'increment-price-prev6sec'
        ,'increment-price-prev7sec'
        ,'increment-price-prev8sec'
        ,'increment-price-prev9sec'
        ,'increment-price-prev10sec'
        ,'increment-price-prev11sec'
        ,'increment-price-prev12sec'
        ,'increment-price-prev13sec'
        ,'increment-price-prev14sec'
        ,'increment-price-prev15sec'
        ,'increment-price-mv2sec'
        ,'increment-price-mv3sec'
        ,'increment-price-mv4sec'
        ,'increment-price-mv5sec'
        ,'increment-price-mv6sec'
        ,'increment-price-mv7sec'
        ,'increment-price-mv8sec'
        ,'increment-price-mv9sec'
        ,'increment-price-mv10sec'
        ,'increment-price-mv11sec'
        ,'increment-price-mv12sec'
        ,'increment-price-mv13sec'
        ,'increment-price-mv14sec'
        ,'increment-price-mv15sec'
        ,'volume-plate'
        ,'ratio-bid'
#         ,'date-curr_m0'
        ,'volume-plate_m0'
        ,'ratio-bid_m0'
        ,'deal-early-second'
        ,'deal-price-avg'
        ,'d-avg-low-price'
        ,'d-increment-avg-low-price_m0'
    
#         ,'datetime-curr_m1'
#         ,'datetime-prev_m1'
        ,'base-price15sec_m1'
        ,'increment-price_m1'
        ,'increment-price-target_m1'
        ,'increment-price-prev1sec_m1'
        ,'increment-price-prev2sec_m1'
        ,'increment-price-prev3sec_m1'
        ,'increment-price-prev4sec_m1'
        ,'increment-price-prev5sec_m1'
        ,'increment-price-prev6sec_m1'
        ,'increment-price-prev7sec_m1'
        ,'increment-price-prev8sec_m1'
        ,'increment-price-prev9sec_m1'
        ,'increment-price-prev10sec_m1'
        ,'increment-price-prev11sec_m1'
        ,'increment-price-prev12sec_m1'
        ,'increment-price-prev13sec_m1'
        ,'increment-price-prev14sec_m1'
        ,'increment-price-prev15sec_m1'
        ,'increment-price-mv2sec_m1'
        ,'increment-price-mv3sec_m1'
        ,'increment-price-mv4sec_m1'
        ,'increment-price-mv5sec_m1'
        ,'increment-price-mv6sec_m1'
        ,'increment-price-mv7sec_m1'
        ,'increment-price-mv8sec_m1'
        ,'increment-price-mv9sec_m1'
        ,'increment-price-mv10sec_m1'
        ,'increment-price-mv11sec_m1'
        ,'increment-price-mv12sec_m1'
        ,'increment-price-mv13sec_m1'
        ,'increment-price-mv14sec_m1'
        ,'increment-price-mv15sec_m1'
        ,'volume-plate_m0_m1'
        ,'ratio-bid_m0_m1'
        ,'deal-early-second_m1'
        ,'deal-price-avg_m1'
        ,'d-avg-low-price_m1'
        ,'d-increment-avg-low-price_m0_m1'

#         ,'datetime-curr_m2'
#         ,'datetime-prev_m2'
        ,'base-price15sec_m2'
        ,'increment-price_m2'
        ,'increment-price-target_m2'
        ,'increment-price-prev1sec_m2'
        ,'increment-price-prev2sec_m2'
        ,'increment-price-prev3sec_m2'
        ,'increment-price-prev4sec_m2'
        ,'increment-price-prev5sec_m2'
        ,'increment-price-prev6sec_m2'
        ,'increment-price-prev7sec_m2'
        ,'increment-price-prev8sec_m2'
        ,'increment-price-prev9sec_m2'
        ,'increment-price-prev10sec_m2'
        ,'increment-price-prev11sec_m2'
        ,'increment-price-prev12sec_m2'
        ,'increment-price-prev13sec_m2'
        ,'increment-price-prev14sec_m2'
        ,'increment-price-prev15sec_m2'
        ,'increment-price-mv2sec_m2'
        ,'increment-price-mv3sec_m2'
        ,'increment-price-mv4sec_m2'
        ,'increment-price-mv5sec_m2'
        ,'increment-price-mv6sec_m2'
        ,'increment-price-mv7sec_m2'
        ,'increment-price-mv8sec_m2'
        ,'increment-price-mv9sec_m2'
        ,'increment-price-mv10sec_m2'
        ,'increment-price-mv11sec_m2'
        ,'increment-price-mv12sec_m2'
        ,'increment-price-mv13sec_m2'
        ,'increment-price-mv14sec_m2'
        ,'increment-price-mv15sec_m2'
        ,'volume-plate_m0_m2'
        ,'ratio-bid_m0_m2'
        ,'deal-early-second_m2'
        ,'deal-price-avg_m2'
        ,'d-avg-low-price_m2'
        ,'d-increment-avg-low-price_m0_m2'

#         ,'datetime-curr_m3'
#         ,'datetime-prev_m3'
        ,'base-price15sec_m3'
        ,'increment-price_m3'
        ,'increment-price-target_m3'
        ,'increment-price-prev1sec_m3'
        ,'increment-price-prev2sec_m3'
        ,'increment-price-prev3sec_m3'
        ,'increment-price-prev4sec_m3'
        ,'increment-price-prev5sec_m3'
        ,'increment-price-prev6sec_m3'
        ,'increment-price-prev7sec_m3'
        ,'increment-price-prev8sec_m3'
        ,'increment-price-prev9sec_m3'
        ,'increment-price-prev10sec_m3'
        ,'increment-price-prev11sec_m3'
        ,'increment-price-prev12sec_m3'
        ,'increment-price-prev13sec_m3'
        ,'increment-price-prev14sec_m3'
        ,'increment-price-prev15sec_m3'
        ,'increment-price-mv2sec_m3'
        ,'increment-price-mv3sec_m3'
        ,'increment-price-mv4sec_m3'
        ,'increment-price-mv5sec_m3'
        ,'increment-price-mv6sec_m3'
        ,'increment-price-mv7sec_m3'
        ,'increment-price-mv8sec_m3'
        ,'increment-price-mv9sec_m3'
        ,'increment-price-mv10sec_m3'
        ,'increment-price-mv11sec_m3'
        ,'increment-price-mv12sec_m3'
        ,'increment-price-mv13sec_m3'
        ,'increment-price-mv14sec_m3'
        ,'increment-price-mv15sec_m3'
        ,'volume-plate_m0_m3'
        ,'ratio-bid_m0_m3'
        ,'deal-early-second_m3'
        ,'deal-price-avg_m3'
        ,'d-avg-low-price_m3'
        ,'d-increment-avg-low-price_m0_m3'
        ]]

X_col = X.columns # get the column list

# X = StandardScaler().fit_transform(X.as_matrix())
X = X.as_matrix()

# y = StandardScaler().fit_transform(df_wnv_raw[['increment-price-target']].as_matrix()).reshape(len(df_wnv_raw),)
y = df_history_ts_process[['increment-price-target']].as_matrix().reshape(len(df_history_ts_process),)

In [51]:
X_col


Out[51]:
Index(['month', 'second', 'base-price15sec', 'increment-price',
       'increment-price-prev1sec', 'increment-price-prev2sec',
       'increment-price-prev3sec', 'increment-price-prev4sec',
       'increment-price-prev5sec', 'increment-price-prev6sec',
       ...
       'increment-price-mv12sec_m3', 'increment-price-mv13sec_m3',
       'increment-price-mv14sec_m3', 'increment-price-mv15sec_m3',
       'volume-plate_m0_m3', 'ratio-bid_m0_m3', 'deal-early-second_m3',
       'deal-price-avg_m3', 'd-avg-low-price_m3',
       'd-increment-avg-low-price_m0_m3'],
      dtype='object', length=155)

In [52]:
plt.figure()
plt.plot(X)
plt.figure()
plt.plot(y)


Out[52]:
[<matplotlib.lines.Line2D at 0x7f51541d4eb8>]
/home/user/env_py3/lib/python3.5/site-packages/matplotlib/font_manager.py:1297: UserWarning: findfont: Font family ['sans-serif'] not found. Falling back to DejaVu Sans
  (prop.get_family(), self.defaultFamily[fontext]))

[4] Evaluation

K-fold Cross-Validation


In [53]:
rng = check_random_state(0)

In [54]:
# GB
classifier_GB = GradientBoostingRegressor(n_estimators=1500, # score: 0.94608 (AUC 0.81419), learning_rate=0.001, max_features=8 <<< Best
#                                    loss='deviance',
#                                    subsample=1,
#                                    max_depth=5,
#                                    min_samples_split=20,
                                   learning_rate=0.002,
#                                    max_features=10,
                                   random_state=rng)

In [55]:
# AB
classifier_AB = AdaBoostRegressor(n_estimators=1500, # score: 0.93948 (AUC 0.88339), learning_rate=0.004 <<< Best
                                   learning_rate=0.002,
                                   random_state=rng)

In [56]:
# RF
classifier_RF = RandomForestRegressor(n_estimators=1500, # score: 0.94207 (AUC 0.81870), max_depth=3, min_samples_split=20, <<< Best
#                                     max_features=10,
#                                     max_depth=3,
#                                     min_samples_split=20,
                                    random_state=rng)

In [57]:
# ET
classifier_ET = ExtraTreesRegressor(n_estimators=1000, # score: 0.94655 (AUC 0.84364), max_depth=3, min_samples_split=20, max_features=10 <<< Best
#                                     max_depth=3,
#                                     min_samples_split=20,
#                                     max_features=10,
                                    random_state=rng)

In [58]:
# BG
classifier_BG = BaggingRegressor(n_estimators=500, # score: 0.70725 (AUC 0.63729) <<< Best
#                                     max_features=10,
                                    random_state=rng)

LR


In [59]:
classifier_LR = LinearRegression() # score: 0.90199 (AUC 0.80569)

SVM Linear


In [95]:
# classifier_SVCL = svm.SVC(kernel='linear', probability=True, random_state=rng) # score: 0.89976 (AUC 0.70524)
classifier_SVRL = svm.SVR(kernel='linear') # score: 0.89976 (AUC 0.70524)

SVM


In [61]:
classifier_SVRR = svm.SVR(kernel='rbf') # score: 0.80188 (AUC 0.50050)
# classifier_SVRR = svm.SVR(kernel='poly') # score: 0.80188 (AUC 0.50050)

KNN


In [62]:
classifier_KNN = KNeighborsRegressor(n_neighbors=2) # score: 0.94018 (AUC 0.72792)
cv = cross_val_score(classifier_KNN,
                            X,
                            y,
                            cv=StratifiedKFold(parm_ts_valid_month))
print('KNN CV score: {0:.5f}'.format(cv.mean()))


/home/user/env_py3/lib/python3.5/site-packages/sklearn/model_selection/_split.py:581: Warning: The least populated class in y has only 1 members, which is too few. The minimum number of groups for any class cannot be less than n_splits=27.
  % (min_groups, self.n_splits)), Warning)
KNN CV score: 0.97373

In [ ]:

Select Model


In [168]:
# classifier = classifier_GB     # 219.099617786
# classifier = classifier_AB     # 230.101439444
classifier = classifier_RF     # 197.955555556
# classifier = classifier_ET     # 
# classifier = classifier_BG     # 
# classifier = classifier_LR     # 
# classifier = classifier_SVRL   # 
# classifier = classifier_SVRR   #

Split Data


In [169]:
n_splits = parm_ts_valid_cycle
print('cycle seconds : %d' % n_splits)
# n_splits=54 # 19 seconds/records for each bidding month
# n_splits=19 # 19 seconds/records for each bidding month
n_fold = parm_ts_valid_month
print('cycle month   : %d' % n_fold)


# X_train_1 = X[0:(len(X)-batch*n_splits)]
# y_train_1 = y[0:(len(X)-batch*n_splits)]

# X_test_1 = X[(len(X)-batch*n_splits):((len(X)-batch*n_splits)+n_splits)]
# y_test_1 = y[(len(X)-batch*n_splits):((len(X)-batch*n_splits)+n_splits)]


cycle seconds : 39
cycle month   : 27

Cross-Validation


In [170]:
n_fold=7

In [171]:
y_pred = {}
y_test = {}

y_pred_org = {}
y_test_org = {}

i = 0
for batch in range(1, n_fold):
    X_train_1 = X[0:(len(X)-batch*n_splits)]
    y_train_1 = y[0:(len(X)-batch*n_splits)]
    X_test_1  = X[(len(X)-batch*n_splits):((len(X)-batch*n_splits)+n_splits)]
    y_test_1  = y[(len(X)-batch*n_splits):((len(X)-batch*n_splits)+n_splits)]
    print(len(X_train_1))
    
    # ReScale
    ScalerX = StandardScaler()
    ScalerX.fit(X_train_1)
    X_train_1 = ScalerX.transform(X_train_1)
    X_test_1  = ScalerX.transform(X_test_1)
    
    ScalerY = StandardScaler()
    ScalerY.fit(y_train_1.reshape(-1, 1))
    y_train_1 = ScalerY.transform(y_train_1.reshape(-1, 1))
    y_test_1  = ScalerY.transform(y_test_1.reshape(-1, 1))
    
    y_pred[i] = classifier.fit(X_train_1, y_train_1).predict(X_test_1)
    y_test[i] = y_test_1  

    y_pred_org[i] = ScalerY.inverse_transform(y_pred[i])
    y_test_org[i] = ScalerY.inverse_transform(y_test[i])
    
    plt.figure()
    plt.plot(y_train_1)
    plt.plot()
    plt.figure()
    plt.plot(y_test[i])
    plt.plot(y_pred[i])
    plt.plot()
    i += 1


1014
/home/user/env_py3/lib/python3.5/site-packages/sklearn/utils/validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
/home/user/env_py3/lib/python3.5/site-packages/ipykernel_launcher.py:26: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
975
/home/user/env_py3/lib/python3.5/site-packages/sklearn/utils/validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
/home/user/env_py3/lib/python3.5/site-packages/ipykernel_launcher.py:26: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
936
/home/user/env_py3/lib/python3.5/site-packages/sklearn/utils/validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
/home/user/env_py3/lib/python3.5/site-packages/ipykernel_launcher.py:26: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
897
/home/user/env_py3/lib/python3.5/site-packages/sklearn/utils/validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
/home/user/env_py3/lib/python3.5/site-packages/ipykernel_launcher.py:26: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
858
/home/user/env_py3/lib/python3.5/site-packages/sklearn/utils/validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
/home/user/env_py3/lib/python3.5/site-packages/ipykernel_launcher.py:26: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
819
/home/user/env_py3/lib/python3.5/site-packages/sklearn/utils/validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
/home/user/env_py3/lib/python3.5/site-packages/ipykernel_launcher.py:26: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
/home/user/env_py3/lib/python3.5/site-packages/matplotlib/font_manager.py:1297: UserWarning: findfont: Font family ['sans-serif'] not found. Falling back to DejaVu Sans
  (prop.get_family(), self.defaultFamily[fontext]))

no inverse-scale


In [172]:
k = []
for i in range(0, len(y_test)):
    k.append(np.mean(np.sqrt(np.square(y_test[i] - y_pred[i]))))

k_mean = np.mean(k)

print(k_mean)
print()
print(k)


1.02095524945

[1.3526147745883981, 0.83484623472401887, 1.1677609760215473, 0.90333885828878691, 0.7580003419179886, 1.1091703111534197]

In [173]:
# 49~51 second predicts 56~58 second
k = []
for i in range(0, len(y_test)):
    k.append(np.mean(np.sqrt(np.square(y_test[i][34:36] - y_pred[i][34:36]))))

k_mean = np.mean(k)

print(k_mean)
print()
print(k)


0.540584602727

[0.6711926558055259, 0.74467007237690819, 0.37009688105586869, 0.3618985422471559, 0.61193362234624937, 0.4837158425323318]

inverse-scale


In [174]:
k = []
for i in range(0, len(y_test)):
    k.append(np.mean(np.sqrt(np.square(y_test_org[i] - y_pred_org[i]))))

k_mean = np.mean(k)

print(k_mean)
print()
print(k)


394.404499963

[515.83445101906671, 322.53916283146987, 449.10216962524771, 350.45395573088149, 296.87438088976592, 431.62287968441757]

In [175]:
# 49~51 second predicts 56~58 second
k = []
for i in range(0, len(y_test)):
    k.append(np.mean(np.sqrt(np.square(y_test_org[i][34:36] - y_pred_org[i][34:36]))))

k_mean = np.mean(k)

print(k_mean)
print()
print(k)


209.05

[255.96666666666363, 287.69999999999743, 142.33333333332848, 140.40000000001066, 239.66666666666231, 188.23333333333386]

In [176]:
# 48 second predicts 56 second
k = []
for i in range(0, len(y_test)):
    k.append(np.mean(np.sqrt(np.square(y_test_org[i][33:34] - y_pred_org[i][33:34]))))

k_mean = np.mean(k)

print(k_mean)
print()
print(k)


199.011111111

[233.59999999999513, 319.59999999999889, 106.39999999999441, 125.33333333334122, 325.1999999999997, 83.933333333334758]

In [177]:
# 49 second predicts 56 second
k = []
for i in range(0, len(y_test)):
    k.append(np.mean(np.sqrt(np.square(y_test_org[i][34:35] - y_pred_org[i][34:35]))))

k_mean = np.mean(k)

print(k_mean)
print()
print(k)


220.144444444

[281.26666666666301, 330.33333333333144, 173.33333333332803, 180.60000000000991, 205.19999999999527, 150.13333333333458]

In [178]:
# 50 second predicts 57 second
k = []
for i in range(0, len(y_test)):
    k.append(np.mean(np.sqrt(np.square(y_test_org[i][35:36] - y_pred_org[i][35:36]))))

k_mean = np.mean(k)

print(k_mean)
print()
print(k)


197.955555556

[230.66666666666424, 245.06666666666342, 111.33333333332894, 100.20000000001141, 274.13333333332935, 226.33333333333314]

In [179]:
# 51 second predicts 58 second
k = []
for i in range(0, len(y_test)):
    k.append(np.mean(np.sqrt(np.square(y_test_org[i][36:37] - y_pred_org[i][36:37]))))

k_mean = np.mean(k)

print(k_mean)
print()
print(k)


149.755555556

[152.06666666666615, 251.79999999999563, 132.66666666666765, 124.2000000000121, 92.133333333335713, 145.66666666666742]

In [180]:
# 52 second predicts 59 second
k = []
for i in range(0, len(y_test)):
    k.append(np.mean(np.sqrt(np.square(y_test_org[i][37:38] - y_pred_org[i][37:38]))))

k_mean = np.mean(k)

print(k_mean)
print()
print(k)


160.155555556

[192.60000000000105, 257.86666666666133, 77.733333333339942, 57.466666666678975, 164.53333333333489, 210.73333333333085]

In [181]:
# 53 second predicts 60 second
k = []
for i in range(0, len(y_test)):
    k.append(np.mean(np.sqrt(np.square(y_test_org[i][38:39] - y_pred_org[i][38:39]))))

k_mean = np.mean(k)

print(k_mean)
print()
print(k)


120.222222222

[158.06666666666843, 213.53333333332534, 40.06666666667752, 71.999999999987949, 78.399999999995771, 159.2666666666662]

In [182]:
plt.plot(y_test_org[0])
plt.plot(y_pred_org[0])


Out[182]:
[<matplotlib.lines.Line2D at 0x7f5153955400>]
/home/user/env_py3/lib/python3.5/site-packages/matplotlib/font_manager.py:1297: UserWarning: findfont: Font family ['sans-serif'] not found. Falling back to DejaVu Sans
  (prop.get_family(), self.defaultFamily[fontext]))

In [183]:
plt.plot(k)


Out[183]:
[<matplotlib.lines.Line2D at 0x7f51530c4be0>]
/home/user/env_py3/lib/python3.5/site-packages/matplotlib/font_manager.py:1297: UserWarning: findfont: Font family ['sans-serif'] not found. Falling back to DejaVu Sans
  (prop.get_family(), self.defaultFamily[fontext]))

In [ ]:


In [184]:
# plt.plot(df_history_ts_process['increment-price-target'][819:])
plt.plot(df_history_ts_process['increment-price'][819:])
plt.plot(df_history_ts_process['d-increment-avg-low-price_m0'][819:])
plt.plot(df_history_ts_process['increment-price'][819:] - df_history_ts_process['d-increment-avg-low-price_m0'][819:])
plt.figure()
plt.plot(df_history_ts_process['d-increment-avg-low-price_m0'][819:])
plt.plot(df_history_ts_process['d-increment-avg-low-price_m0_m1'][819:])
plt.plot(df_history_ts_process['d-increment-avg-low-price_m0_m2'][819:])
plt.plot(df_history_ts_process['d-increment-avg-low-price_m0_m3'][819:])


Out[184]:
[<matplotlib.lines.Line2D at 0x7f5152f98128>]
/home/user/env_py3/lib/python3.5/site-packages/matplotlib/font_manager.py:1297: UserWarning: findfont: Font family ['sans-serif'] not found. Falling back to DejaVu Sans
  (prop.get_family(), self.defaultFamily[fontext]))

Model Feature Importances:


In [133]:
def util_feature_importances(classifier):
    print(classifier)
    dict_importance ={}
    for i in range(len(X_col)):
        dict_importance[X_col[i]] = classifier.feature_importances_[i]
        dict_importance_sort = sorted(dict_importance.items(), key=operator.itemgetter(1), reverse=True)
    return dict_importance_sort

In [153]:
util_feature_importances(classifier_GB)


GradientBoostingRegressor(alpha=0.9, criterion='friedman_mse', init=None,
             learning_rate=0.002, loss='ls', max_depth=3,
             max_features=None, max_leaf_nodes=None,
             min_impurity_split=1e-07, min_samples_leaf=1,
             min_samples_split=2, min_weight_fraction_leaf=0.0,
             n_estimators=1500, presort='auto',
             random_state=<mtrand.RandomState object at 0x7f5156f7fd38>,
             subsample=1.0, verbose=0, warm_start=False)
Out[153]:
[('d-increment-avg-low-price_m0', 0.34293549012727476),
 ('increment-price', 0.093617586399111191),
 ('second', 0.055744245486003534),
 ('increment-price-mv4sec', 0.043445928494641566),
 ('increment-price-mv2sec', 0.036846588194772198),
 ('d-increment-avg-low-price_m0_m1', 0.029019139891947588),
 ('increment-price-prev13sec_m2', 0.026642112163762371),
 ('increment-price-prev15sec', 0.023343725372192998),
 ('increment-price-mv5sec', 0.023076155426499524),
 ('d-avg-low-price_m2', 0.021947401417752199),
 ('increment-price-mv7sec', 0.020128288600561518),
 ('increment-price-mv3sec', 0.019346445029319537),
 ('increment-price-target_m1', 0.016701508972590678),
 ('increment-price-mv14sec', 0.016175748878961276),
 ('volume-plate_m0', 0.013644903262072721),
 ('increment-price-mv15sec', 0.013544236611419312),
 ('deal-price-avg_m2', 0.013346950586969712),
 ('deal-price-avg_m3', 0.013303658463784155),
 ('increment-price-prev1sec', 0.012341382385295672),
 ('deal-price-avg', 0.01148488572721886),
 ('ratio-bid_m0_m2', 0.011454804883250501),
 ('volume-plate_m0_m1', 0.010672982750998036),
 ('increment-price-mv6sec', 0.010194971854790562),
 ('increment-price_m2', 0.0087645448601464618),
 ('ratio-bid', 0.0084519439811856258),
 ('increment-price_m1', 0.0083487421993919623),
 ('d-increment-avg-low-price_m0_m2', 0.0083458742004151355),
 ('volume-plate', 0.0055839560486302181),
 ('deal-price-avg_m1', 0.0051691043460084874),
 ('increment-price-mv11sec', 0.0043488707037017665),
 ('increment-price-mv10sec', 0.0043338397554409259),
 ('deal-early-second', 0.0042623433782527868),
 ('increment-price-mv12sec_m2', 0.004223752022050541),
 ('ratio-bid_m0_m1', 0.0037495752753867683),
 ('deal-early-second_m2', 0.0036109727193235812),
 ('ratio-bid_m0', 0.0031947059804710402),
 ('increment-price-prev9sec', 0.0030827965963640029),
 ('base-price15sec_m2', 0.0030581052466973702),
 ('increment-price-mv7sec_m3', 0.0030128556612264027),
 ('base-price15sec_m3', 0.0026652212117559367),
 ('increment-price-prev15sec_m3', 0.0026170696689494979),
 ('increment-price-prev11sec_m1', 0.0024267960673887215),
 ('base-price15sec_m1', 0.0021004213382448808),
 ('volume-plate_m0_m2', 0.0020406700880926726),
 ('base-price15sec', 0.0020323502324012489),
 ('increment-price-prev9sec_m1', 0.0020058380485416033),
 ('increment-price-prev7sec', 0.0018895652603097496),
 ('deal-early-second_m3', 0.0015520171748478623),
 ('ratio-bid_m0_m3', 0.0013526462051937319),
 ('increment-price-prev2sec', 0.0011440577406955359),
 ('increment-price-mv10sec_m3', 0.0010028588058385605),
 ('increment-price-mv9sec', 0.00097324731656679535),
 ('increment-price-target_m3', 0.00092244535977213681),
 ('increment-price-prev4sec', 0.0008730701639199709),
 ('increment-price-mv6sec_m3', 0.00079820908886933705),
 ('increment-price-prev8sec', 0.0006328759781895158),
 ('increment-price-mv6sec_m2', 0.00063153244667253151),
 ('d-increment-avg-low-price_m0_m3', 0.00062209602774891844),
 ('increment-price-mv8sec', 0.0005974637784359263),
 ('increment-price-mv7sec_m2', 0.0005223195020197844),
 ('d-avg-low-price_m1', 0.00052004576180119928),
 ('increment-price-prev11sec_m2', 0.00051257599505423609),
 ('increment-price-mv12sec_m3', 0.00051057794878733636),
 ('increment-price-prev7sec_m3', 0.00045571355861293266),
 ('increment-price-prev10sec', 0.0004275539131870762),
 ('increment-price-prev9sec_m3', 0.00042244933072896313),
 ('increment-price-mv10sec_m1', 0.00041796770517011674),
 ('increment-price-prev14sec', 0.00038972201274172744),
 ('increment-price-prev13sec', 0.0003548364610088449),
 ('increment-price-mv12sec', 0.00035218841257563769),
 ('increment-price-prev8sec_m3', 0.00034009910597776363),
 ('increment-price-prev5sec', 0.00032809249434366078),
 ('increment-price-mv11sec_m3', 0.00032302060399291675),
 ('increment-price-prev6sec_m2', 0.0003045638096126826),
 ('month', 0.00027425984121313073),
 ('increment-price-mv13sec', 0.00026703929171773336),
 ('increment-price-prev12sec', 0.00026651937624310189),
 ('increment-price-mv13sec_m3', 0.00026059821174144584),
 ('increment-price-mv14sec_m3', 0.00025116904827436609),
 ('increment-price-prev11sec', 0.00020343724708123679),
 ('d-avg-low-price_m3', 0.00018237587463156826),
 ('increment-price-prev10sec_m2', 0.00017658299165475196),
 ('increment-price-mv9sec_m3', 0.00015183120015108595),
 ('increment-price-mv9sec_m2', 0.00013730710786966557),
 ('increment-price-mv15sec_m3', 0.00012279379868338156),
 ('increment-price-mv14sec_m2', 0.00011551342684887069),
 ('increment-price-mv8sec_m3', 0.00011337056523317496),
 ('increment-price-mv11sec_m2', 0.0001121415195227957),
 ('increment-price-prev3sec', 0.00011042259338543166),
 ('increment-price-mv7sec_m1', 9.5515359574179428e-05),
 ('volume-plate_m0_m3', 8.6033914308860618e-05),
 ('d-avg-low-price', 8.4845421958018959e-05),
 ('increment-price-prev14sec_m2', 8.3330831906989913e-05),
 ('increment-price-mv15sec_m2', 8.3239394008173927e-05),
 ('increment-price-mv6sec_m1', 7.9589219429141225e-05),
 ('increment-price-mv15sec_m1', 7.7630608272147888e-05),
 ('increment-price-mv14sec_m1', 7.5250723563739414e-05),
 ('increment-price-mv8sec_m2', 7.5112616140031785e-05),
 ('increment-price-mv12sec_m1', 7.4563242970898878e-05),
 ('increment-price-prev15sec_m2', 7.3674987265851536e-05),
 ('increment-price-prev8sec_m1', 6.1454116688589833e-05),
 ('increment-price-mv11sec_m1', 5.969287692818918e-05),
 ('increment-price-mv13sec_m1', 5.8557080104030046e-05),
 ('increment-price-mv9sec_m1', 5.4718046892375156e-05),
 ('increment-price-mv8sec_m1', 4.9985170332368683e-05),
 ('increment-price-prev10sec_m1', 4.9033106001829604e-05),
 ('increment-price-prev5sec_m1', 4.7321157766172518e-05),
 ('increment-price-mv5sec_m1', 4.4998750499447296e-05),
 ('increment-price-prev8sec_m2', 4.0534821937477046e-05),
 ('increment-price-mv5sec_m2', 3.7117137455092817e-05),
 ('increment-price-mv3sec_m1', 3.6955627448736537e-05),
 ('increment-price-mv4sec_m2', 3.685354907896097e-05),
 ('increment-price-mv2sec_m1', 3.6483492085740329e-05),
 ('increment-price-prev6sec', 3.5926754745221956e-05),
 ('increment-price-mv4sec_m1', 3.317276587892514e-05),
 ('increment-price-prev3sec_m1', 2.9905844084448749e-05),
 ('deal-early-second_m1', 2.6220329283336925e-05),
 ('increment-price-mv2sec_m2', 2.4632870338742239e-05),
 ('increment-price-mv13sec_m2', 2.1388301096560156e-05),
 ('increment-price-mv10sec_m2', 1.7597213770156783e-05),
 ('increment-price-prev7sec_m2', 0.0),
 ('increment-price-mv3sec_m2', 0.0),
 ('increment-price-prev4sec_m3', 0.0),
 ('increment-price-prev4sec_m2', 0.0),
 ('increment-price_m3', 0.0),
 ('increment-price-prev12sec_m2', 0.0),
 ('increment-price-prev14sec_m3', 0.0),
 ('increment-price-prev11sec_m3', 0.0),
 ('increment-price-prev3sec_m3', 0.0),
 ('increment-price-prev13sec_m1', 0.0),
 ('increment-price-mv3sec_m3', 0.0),
 ('increment-price-prev5sec_m2', 0.0),
 ('increment-price-prev13sec_m3', 0.0),
 ('increment-price-prev12sec_m1', 0.0),
 ('increment-price-prev1sec_m3', 0.0),
 ('increment-price-mv2sec_m3', 0.0),
 ('increment-price-prev12sec_m3', 0.0),
 ('increment-price-prev5sec_m3', 0.0),
 ('increment-price-prev10sec_m3', 0.0),
 ('increment-price-prev2sec_m2', 0.0),
 ('increment-price-prev9sec_m2', 0.0),
 ('increment-price-prev14sec_m1', 0.0),
 ('increment-price-prev4sec_m1', 0.0),
 ('increment-price-prev1sec_m1', 0.0),
 ('increment-price-prev6sec_m1', 0.0),
 ('increment-price-prev3sec_m2', 0.0),
 ('increment-price-target_m2', 0.0),
 ('increment-price-prev6sec_m3', 0.0),
 ('increment-price-prev7sec_m1', 0.0),
 ('increment-price-prev15sec_m1', 0.0),
 ('increment-price-prev2sec_m1', 0.0),
 ('increment-price-prev1sec_m2', 0.0),
 ('increment-price-prev2sec_m3', 0.0),
 ('increment-price-mv4sec_m3', 0.0),
 ('increment-price-mv5sec_m3', 0.0)]

In [185]:
util_feature_importances(classifier_RF)


RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_split=1e-07, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           n_estimators=1500, n_jobs=1, oob_score=False,
           random_state=<mtrand.RandomState object at 0x7f5156f7fd38>,
           verbose=0, warm_start=False)
Out[185]:
[('d-increment-avg-low-price_m0', 0.68073389360881387),
 ('increment-price', 0.075237201884603325),
 ('increment-price-mv3sec', 0.042607274838927948),
 ('increment-price-target_m1', 0.036337292275652737),
 ('second', 0.028474339171678832),
 ('increment-price-mv14sec', 0.014261681542255798),
 ('increment-price-mv15sec', 0.012346371646694112),
 ('increment-price-prev1sec', 0.0070983575032114825),
 ('increment-price-mv2sec', 0.0059825853995778737),
 ('increment-price_m1', 0.0052155308681012674),
 ('d-increment-avg-low-price_m0_m2', 0.0047248690618915973),
 ('increment-price-mv5sec', 0.0045377704864446736),
 ('increment-price-mv4sec', 0.0045239319855392476),
 ('d-increment-avg-low-price_m0_m1', 0.0041883791977969766),
 ('increment-price-mv11sec', 0.0036741568314661048),
 ('increment-price-mv6sec', 0.0031497169748871253),
 ('increment-price-mv7sec', 0.0027828431535250235),
 ('deal-price-avg_m2', 0.0023944853187150279),
 ('deal-price-avg_m3', 0.002014658901805948),
 ('increment-price-mv8sec', 0.0019982669403534117),
 ('d-avg-low-price_m2', 0.0018923729648450086),
 ('ratio-bid_m0_m1', 0.0017418459456098572),
 ('increment-price-mv9sec', 0.0017344562937251509),
 ('increment-price-prev15sec', 0.0016752573961927306),
 ('increment-price-mv10sec', 0.0014084715734418826),
 ('base-price15sec_m3', 0.0012640411591565948),
 ('deal-price-avg_m1', 0.0011788894324284477),
 ('ratio-bid', 0.0010801704395059671),
 ('base-price15sec_m2', 0.00098470001345279706),
 ('increment-price-prev2sec', 0.00096798196060162713),
 ('deal-early-second_m2', 0.0009533582882351859),
 ('increment-price-mv15sec_m3', 0.00091847301892948065),
 ('volume-plate_m0_m1', 0.00089966123967609457),
 ('ratio-bid_m0_m2', 0.00089563921679118824),
 ('volume-plate_m0', 0.00087151914214459304),
 ('increment-price-prev4sec', 0.00086266668205245227),
 ('increment-price-prev7sec', 0.00085053129210087568),
 ('volume-plate', 0.00085047061506220135),
 ('deal-price-avg', 0.00083831778963284814),
 ('base-price15sec_m1', 0.00080984515175093258),
 ('increment-price-prev5sec', 0.00079935059682807567),
 ('increment-price-target_m3', 0.00078042589259974743),
 ('ratio-bid_m0', 0.00077993927142979227),
 ('d-increment-avg-low-price_m0_m3', 0.0007580307914234239),
 ('deal-early-second', 0.00074427676476980932),
 ('increment-price-mv15sec_m2', 0.00073238697153937772),
 ('increment-price-mv13sec', 0.00069671653130077605),
 ('volume-plate_m0_m2', 0.00067333810571299337),
 ('increment-price-mv14sec_m3', 0.00066729843684709483),
 ('increment-price_m2', 0.00064704031707297539),
 ('increment-price-mv12sec', 0.00064225595773194084),
 ('increment-price-prev15sec_m3', 0.00062352387957134161),
 ('increment-price-prev6sec', 0.00060377810537261202),
 ('d-avg-low-price_m1', 0.00060089995780294254),
 ('increment-price-mv9sec_m3', 0.00059350271645355012),
 ('month', 0.000566598250910967),
 ('increment-price-target_m2', 0.00054849385160641326),
 ('increment-price-mv12sec_m3', 0.00052995535641339178),
 ('increment-price-mv14sec_m2', 0.00049464419439086053),
 ('increment-price-prev8sec', 0.00048597066492639461),
 ('increment-price-prev13sec_m2', 0.00047528844328196228),
 ('increment-price-mv11sec_m3', 0.00046855505005045619),
 ('ratio-bid_m0_m3', 0.00046262852601592979),
 ('increment-price-prev9sec', 0.0004520563299130178),
 ('increment-price-mv13sec_m3', 0.00044686997714834051),
 ('increment-price-prev3sec', 0.00043587076738588665),
 ('base-price15sec', 0.00041956634212909129),
 ('increment-price-prev10sec', 0.00041019390863062711),
 ('increment-price-prev6sec_m3', 0.00040182723177282782),
 ('d-avg-low-price', 0.0003989591448200256),
 ('increment-price-mv8sec_m3', 0.00037649728664578799),
 ('increment-price-prev14sec', 0.00037579667150336975),
 ('deal-early-second_m1', 0.00037006345908156341),
 ('increment-price-mv10sec_m3', 0.00036472478203792571),
 ('increment-price-prev15sec_m2', 0.00035659066732975542),
 ('increment-price-mv10sec_m2', 0.00035068541839788242),
 ('increment-price-mv13sec_m2', 0.00035000669183584256),
 ('increment-price-mv12sec_m2', 0.00034934951117034996),
 ('increment-price-mv7sec_m3', 0.00034750427579168029),
 ('increment-price-mv2sec_m2', 0.00034481916613272119),
 ('volume-plate_m0_m3', 0.00033497271074749623),
 ('increment-price-mv5sec_m2', 0.00033382878873831911),
 ('increment-price-prev14sec_m2', 0.00033225457712942425),
 ('increment-price-mv9sec_m2', 0.0003208210124593859),
 ('increment-price-prev14sec_m3', 0.00031722464905675806),
 ('increment-price-mv11sec_m2', 0.00031146120736533705),
 ('increment-price-mv2sec_m1', 0.00030453725009148242),
 ('increment-price-mv15sec_m1', 0.0003018683529951798),
 ('increment-price-mv4sec_m1', 0.00030101531100126371),
 ('increment-price-mv6sec_m3', 0.00028470942169548231),
 ('increment-price-mv3sec_m1', 0.00027292085476000888),
 ('increment-price-prev12sec_m2', 0.00027021339643640096),
 ('increment-price-prev1sec_m1', 0.00027010110044044006),
 ('increment-price-mv4sec_m2', 0.00025411509086012575),
 ('increment-price-prev13sec', 0.00025332375554954785),
 ('increment-price-mv7sec_m2', 0.00025147643545714098),
 ('d-avg-low-price_m3', 0.00024875876960852314),
 ('increment-price-mv8sec_m2', 0.00024601568886018714),
 ('increment-price-mv10sec_m1', 0.00024300880467761644),
 ('increment-price-mv14sec_m1', 0.00023930669892502528),
 ('increment-price-mv5sec_m1', 0.00023773910171424222),
 ('deal-early-second_m3', 0.00023399979075254196),
 ('increment-price-prev11sec', 0.00023243425290739487),
 ('increment-price-mv11sec_m1', 0.00022596947838701535),
 ('increment-price-mv13sec_m1', 0.0002229863741204767),
 ('increment-price-mv8sec_m1', 0.0002227679310184312),
 ('increment-price-prev12sec', 0.00022221547603823514),
 ('increment-price-mv7sec_m1', 0.00022187003029908625),
 ('increment-price-mv5sec_m3', 0.00021889093851815792),
 ('increment-price-prev11sec_m2', 0.00021618009199091939),
 ('increment-price-mv6sec_m2', 0.00021605679677255888),
 ('increment-price-prev1sec_m2', 0.00021547089423477075),
 ('increment-price-mv4sec_m3', 0.00021248763785567578),
 ('increment-price-mv9sec_m1', 0.00020658311738420588),
 ('increment-price-prev13sec_m3', 0.0002065132407654604),
 ('increment-price-mv6sec_m1', 0.00020502070030668351),
 ('increment-price-prev5sec_m2', 0.0002030833577655085),
 ('increment-price-mv3sec_m2', 0.00019842550455531133),
 ('increment-price-mv12sec_m1', 0.00019425012165832592),
 ('increment-price-prev5sec_m3', 0.00018261796070875166),
 ('increment-price-prev8sec_m1', 0.00017573862063302977),
 ('increment-price-prev9sec_m1', 0.00017223038733174678),
 ('increment-price-prev8sec_m2', 0.00016756413805552381),
 ('increment-price-prev7sec_m3', 0.00016614988085056351),
 ('increment-price-prev6sec_m2', 0.00016259182392001608),
 ('increment-price-prev10sec_m2', 0.00016070282841195842),
 ('increment-price-prev7sec_m2', 0.00015580682551308341),
 ('increment-price-prev9sec_m2', 0.00015389482723794876),
 ('increment-price-prev10sec_m3', 0.00014390662369299874),
 ('increment-price-mv3sec_m3', 0.00014082440848441953),
 ('increment-price-prev12sec_m1', 0.00014020473996459462),
 ('increment-price-prev11sec_m1', 0.00012884399440382796),
 ('increment-price-mv2sec_m3', 0.00012828386760496251),
 ('increment-price_m3', 0.00012564697092266186),
 ('increment-price-prev5sec_m1', 0.00012429710209826042),
 ('increment-price-prev4sec_m3', 0.0001241688284783009),
 ('increment-price-prev8sec_m3', 0.00012341481467952644),
 ('increment-price-prev9sec_m3', 0.00011044242960650262),
 ('increment-price-prev13sec_m1', 0.00010936807404922712),
 ('increment-price-prev6sec_m1', 0.0001076214814267206),
 ('increment-price-prev11sec_m3', 0.00010630902372852263),
 ('increment-price-prev2sec_m1', 0.00010239040101680807),
 ('increment-price-prev3sec_m1', 0.00010100705174989772),
 ('increment-price-prev10sec_m1', 0.00010051483239171249),
 ('increment-price-prev12sec_m3', 9.8073751203979563e-05),
 ('increment-price-prev7sec_m1', 9.7633040234349454e-05),
 ('increment-price-prev4sec_m2', 9.7585998128928543e-05),
 ('increment-price-prev15sec_m1', 9.1906509307413948e-05),
 ('increment-price-prev1sec_m3', 8.8415312960383641e-05),
 ('increment-price-prev14sec_m1', 8.4147246925949339e-05),
 ('increment-price-prev2sec_m2', 8.2559481878877181e-05),
 ('increment-price-prev2sec_m3', 7.5791247784836845e-05),
 ('increment-price-prev4sec_m1', 6.3244648811097939e-05),
 ('increment-price-prev3sec_m3', 5.7275926899550334e-05),
 ('increment-price-prev3sec_m2', 5.5388711944260888e-05)]

In [135]:
util_feature_importances(classifier_AB)


AdaBoostRegressor(base_estimator=None, learning_rate=0.002, loss='linear',
         n_estimators=1500,
         random_state=<mtrand.RandomState object at 0x7f5156f7fd38>)
Out[135]:
[('d-increment-avg-low-price_m0', 0.66675959482680225),
 ('increment-price', 0.16580713923465404),
 ('increment-price-mv3sec', 0.038439823196958559),
 ('increment-price-target_m1', 0.029245799122529205),
 ('second', 0.016244651378878906),
 ('increment-price-mv14sec', 0.0086764788182639649),
 ('increment-price-mv4sec', 0.0086225163674565914),
 ('increment-price-mv5sec', 0.0084813524787433897),
 ('increment-price-mv2sec', 0.0064848471798301032),
 ('increment-price_m1', 0.0062163388350592865),
 ('increment-price-prev1sec', 0.0057369680741931762),
 ('increment-price-mv15sec', 0.0052024435708841691),
 ('increment-price-mv6sec', 0.0051296031000796233),
 ('increment-price-mv7sec', 0.0033231496126610968),
 ('d-increment-avg-low-price_m0_m1', 0.0026369797058024983),
 ('d-increment-avg-low-price_m0_m2', 0.0025948422519552101),
 ('increment-price-mv11sec', 0.0021783428768542184),
 ('increment-price-mv8sec', 0.0019220051822198961),
 ('increment-price-prev5sec', 0.0015081071299385968),
 ('increment-price-mv9sec', 0.0012986215017356821),
 ('increment-price-prev2sec', 0.0011314604481721939),
 ('increment-price-mv10sec', 0.001000808743919382),
 ('increment-price-prev15sec_m3', 0.00094969813580816312),
 ('increment-price-prev4sec', 0.00091079103990560583),
 ('increment-price-prev10sec', 0.00086409493396370719),
 ('increment-price-prev7sec', 0.00080425146477928794),
 ('deal-price-avg_m3', 0.00080398529869627562),
 ('increment-price-mv13sec', 0.00069132720553609925),
 ('increment-price-prev3sec', 0.00064066042169357137),
 ('increment-price_m2', 0.0005873357035237652),
 ('increment-price-prev6sec', 0.00054175665411987633),
 ('increment-price-prev9sec', 0.00041297290732223254),
 ('increment-price-prev8sec', 0.00038397355337097623),
 ('increment-price-prev15sec', 0.0003256300454736719),
 ('increment-price-mv12sec', 0.00031982797492560846),
 ('increment-price-prev14sec', 0.00031210469117095744),
 ('increment-price-prev11sec', 0.00020247435207676955),
 ('increment-price-mv2sec_m2', 0.00019887936651935979),
 ('ratio-bid_m0_m2', 0.00017908798048181168),
 ('increment-price-prev1sec_m1', 0.00016405003862941935),
 ('increment-price-prev13sec', 0.00016319855197236723),
 ('increment-price-prev1sec_m2', 0.00016046243055248862),
 ('increment-price-mv15sec_m2', 0.00014901136304625005),
 ('increment-price-mv2sec_m1', 0.00013950075433606703),
 ('increment-price-prev12sec', 0.00012965426988234668),
 ('increment-price-mv14sec_m2', 0.000109973653894927),
 ('increment-price-mv14sec_m3', 9.7557083153565754e-05),
 ('increment-price-mv12sec_m2', 8.8917605439888696e-05),
 ('increment-price-mv13sec_m2', 7.8820656771996591e-05),
 ('increment-price-mv11sec_m2', 6.7222926935409059e-05),
 ('increment-price-target_m3', 5.6621426549904487e-05),
 ('increment-price-prev14sec_m3', 4.8333727353181414e-05),
 ('increment-price-mv3sec_m2', 4.6234586459472583e-05),
 ('increment-price-mv15sec_m3', 4.6056331670719407e-05),
 ('increment-price-prev13sec_m3', 4.1343010152587857e-05),
 ('deal-price-avg_m2', 4.0094267668926648e-05),
 ('increment-price-mv9sec_m2', 3.6585028512199684e-05),
 ('increment-price-mv13sec_m3', 3.6564762514650011e-05),
 ('increment-price-mv5sec_m1', 3.5034629920539297e-05),
 ('ratio-bid_m0', 3.3128572960170362e-05),
 ('increment-price-mv5sec_m2', 3.1959571108092536e-05),
 ('base-price15sec', 3.1360130045059944e-05),
 ('increment-price-mv9sec_m3', 3.0131583216542806e-05),
 ('increment-price-prev8sec_m2', 2.4457286463642249e-05),
 ('increment-price-mv10sec_m2', 2.333522726335631e-05),
 ('increment-price-prev14sec_m2', 2.0979404425203277e-05),
 ('increment-price-prev6sec_m2', 2.0545930921432433e-05),
 ('increment-price-mv4sec_m1', 2.0159471287142614e-05),
 ('increment-price-mv12sec_m3', 1.940681916074662e-05),
 ('increment-price-prev4sec_m1', 1.5514199923696238e-05),
 ('ratio-bid_m0_m1', 1.5119847021273663e-05),
 ('increment-price-prev12sec_m2', 1.4749343400227149e-05),
 ('increment-price-prev7sec_m1', 1.3994095849113965e-05),
 ('increment-price-mv11sec_m1', 1.2643247492377694e-05),
 ('increment-price-mv6sec_m1', 1.163216682308942e-05),
 ('increment-price-prev10sec_m2', 1.0366720537087934e-05),
 ('increment-price-mv8sec_m2', 1.0090148622393482e-05),
 ('increment-price-mv10sec_m3', 9.6419474704998515e-06),
 ('increment-price-prev7sec_m3', 9.5466962116277703e-06),
 ('increment-price-mv3sec_m1', 9.4295966601753812e-06),
 ('increment-price-target_m2', 9.2236195811474818e-06),
 ('volume-plate_m0_m1', 8.5879229932658517e-06),
 ('ratio-bid_m0_m3', 8.5294104308179661e-06),
 ('increment-price-prev5sec_m2', 7.3156827037291028e-06),
 ('increment-price-prev4sec_m2', 6.8546502968908566e-06),
 ('increment-price-mv6sec_m2', 6.8361922317545549e-06),
 ('increment-price-mv5sec_m3', 6.7777719804121477e-06),
 ('increment-price-mv4sec_m2', 6.7174039271532331e-06),
 ('increment-price-prev9sec_m2', 5.4149284615313195e-06),
 ('increment-price-prev11sec_m2', 5.3321996632558985e-06),
 ('increment-price-prev8sec_m1', 5.2566662469251825e-06),
 ('increment-price-mv4sec_m3', 4.9140934436632498e-06),
 ('deal-price-avg', 4.6112810846246112e-06),
 ('volume-plate_m0', 3.776894520713509e-06),
 ('increment-price-mv11sec_m3', 3.6246652556973996e-06),
 ('increment-price-prev13sec_m2', 3.1987829073630274e-06),
 ('month', 3.1528865542205255e-06),
 ('increment-price-mv7sec_m3', 2.9781135367353346e-06),
 ('ratio-bid', 2.7423549397846671e-06),
 ('increment-price-prev7sec_m2', 0.0),
 ('volume-plate_m0_m2', 0.0),
 ('increment-price-mv7sec_m1', 0.0),
 ('increment-price-prev4sec_m3', 0.0),
 ('increment-price-prev9sec_m1', 0.0),
 ('increment-price-prev10sec_m1', 0.0),
 ('increment-price_m3', 0.0),
 ('increment-price-prev3sec_m1', 0.0),
 ('increment-price-mv12sec_m1', 0.0),
 ('deal-early-second_m2', 0.0),
 ('increment-price-prev11sec_m3', 0.0),
 ('increment-price-prev3sec_m3', 0.0),
 ('increment-price-prev13sec_m1', 0.0),
 ('increment-price-mv3sec_m3', 0.0),
 ('base-price15sec_m2', 0.0),
 ('increment-price-mv9sec_m1', 0.0),
 ('d-increment-avg-low-price_m0_m3', 0.0),
 ('increment-price-mv7sec_m2', 0.0),
 ('increment-price-mv13sec_m1', 0.0),
 ('increment-price-prev12sec_m1', 0.0),
 ('increment-price-mv8sec_m1', 0.0),
 ('increment-price-prev1sec_m3', 0.0),
 ('increment-price-mv2sec_m3', 0.0),
 ('increment-price-prev12sec_m3', 0.0),
 ('increment-price-prev11sec_m1', 0.0),
 ('increment-price-mv10sec_m1', 0.0),
 ('d-avg-low-price_m3', 0.0),
 ('increment-price-prev5sec_m3', 0.0),
 ('increment-price-prev10sec_m3', 0.0),
 ('deal-early-second_m1', 0.0),
 ('increment-price-prev2sec_m2', 0.0),
 ('increment-price-mv6sec_m3', 0.0),
 ('increment-price-mv14sec_m1', 0.0),
 ('increment-price-prev8sec_m3', 0.0),
 ('increment-price-prev14sec_m1', 0.0),
 ('d-avg-low-price', 0.0),
 ('d-avg-low-price_m2', 0.0),
 ('increment-price-prev9sec_m3', 0.0),
 ('volume-plate', 0.0),
 ('increment-price-prev5sec_m1', 0.0),
 ('increment-price-prev15sec_m2', 0.0),
 ('base-price15sec_m1', 0.0),
 ('increment-price-prev6sec_m1', 0.0),
 ('increment-price-prev3sec_m2', 0.0),
 ('volume-plate_m0_m3', 0.0),
 ('increment-price-prev6sec_m3', 0.0),
 ('increment-price-mv15sec_m1', 0.0),
 ('base-price15sec_m3', 0.0),
 ('increment-price-prev15sec_m1', 0.0),
 ('d-avg-low-price_m1', 0.0),
 ('deal-early-second_m3', 0.0),
 ('increment-price-prev2sec_m1', 0.0),
 ('increment-price-mv8sec_m3', 0.0),
 ('increment-price-prev2sec_m3', 0.0),
 ('deal-price-avg_m1', 0.0),
 ('deal-early-second', 0.0)]

In [134]:
util_feature_importances(classifier_ET)


ExtraTreesRegressor(bootstrap=False, criterion='mse', max_depth=None,
          max_features='auto', max_leaf_nodes=None,
          min_impurity_split=1e-07, min_samples_leaf=1,
          min_samples_split=2, min_weight_fraction_leaf=0.0,
          n_estimators=1000, n_jobs=1, oob_score=False,
          random_state=<mtrand.RandomState object at 0x7f5156f7fd38>,
          verbose=0, warm_start=False)
Out[134]:
[('increment-price', 0.18022141975221273),
 ('d-increment-avg-low-price_m0', 0.10148928083402267),
 ('increment-price-prev1sec', 0.087181545000321117),
 ('increment-price-mv2sec', 0.067570085794973839),
 ('increment-price-mv3sec', 0.05591051898214712),
 ('increment-price-prev2sec', 0.050073010425833002),
 ('increment-price-mv4sec', 0.042508145242252653),
 ('increment-price-mv5sec', 0.038899493295749613),
 ('second', 0.035023489224558432),
 ('increment-price-prev3sec', 0.032418096986983599),
 ('increment-price-mv6sec', 0.03170750987725407),
 ('increment-price-mv7sec', 0.030346051414781569),
 ('increment-price-mv8sec', 0.022260165194918365),
 ('increment-price-mv9sec', 0.019650703916101651),
 ('increment-price-target_m1', 0.014703287262900919),
 ('increment-price-mv10sec', 0.014394109442971946),
 ('increment-price_m1', 0.014161736100746837),
 ('increment-price-prev4sec', 0.013567044924402225),
 ('increment-price-mv11sec', 0.012939797538757999),
 ('increment-price-mv12sec', 0.0074606205463557881),
 ('increment-price-prev5sec', 0.0053456315083065262),
 ('increment-price-mv14sec', 0.0052583867550131495),
 ('increment-price-mv13sec', 0.005213847802511256),
 ('increment-price-mv15sec', 0.0047634694882445613),
 ('increment-price-prev2sec_m1', 0.0047376986773605065),
 ('increment-price-mv2sec_m1', 0.004547512412434008),
 ('increment-price-prev1sec_m1', 0.0039878747534452122),
 ('increment-price_m2', 0.0036157482493104224),
 ('increment-price-prev15sec', 0.0031503113168490893),
 ('d-increment-avg-low-price_m0_m1', 0.0030341197736135257),
 ('increment-price-target_m2', 0.0025011325491625862),
 ('increment-price-mv3sec_m1', 0.0023609980844208111),
 ('increment-price-target_m3', 0.0023454763504656352),
 ('deal-early-second_m2', 0.0023317155246399767),
 ('increment-price-prev6sec', 0.0022203501901027962),
 ('increment-price-prev1sec_m2', 0.0020808053395792122),
 ('ratio-bid', 0.0020586340734978937),
 ('increment-price-prev7sec', 0.001962354781064483),
 ('deal-price-avg_m2', 0.0017769345440980444),
 ('base-price15sec_m3', 0.0017671902612357915),
 ('increment-price-mv2sec_m2', 0.0015781890026411553),
 ('d-increment-avg-low-price_m0_m2', 0.0015338785872597023),
 ('deal-price-avg_m3', 0.0014826739423022205),
 ('volume-plate_m0_m2', 0.001471444009630656),
 ('d-avg-low-price_m2', 0.0014547181731138152),
 ('increment-price-prev14sec', 0.0013467944831783195),
 ('ratio-bid_m0_m1', 0.0013094595343186171),
 ('base-price15sec_m1', 0.0010895064161805469),
 ('increment-price-prev2sec_m2', 0.0010617891236381161),
 ('volume-plate_m0_m1', 0.0010380824920448598),
 ('volume-plate_m0', 0.0010000428813156019),
 ('increment-price-mv3sec_m2', 0.00095946555693430505),
 ('increment-price-prev13sec_m2', 0.00093343509797996066),
 ('base-price15sec', 0.00091073544936651305),
 ('month', 0.00090564515194082435),
 ('deal-price-avg', 0.00090138825104962102),
 ('increment-price-mv5sec_m1', 0.0008157642965925286),
 ('increment-price-prev3sec_m1', 0.00081433423868655912),
 ('increment-price-prev14sec_m2', 0.00079836350462281333),
 ('increment-price-mv6sec_m1', 0.00079494249205674722),
 ('increment-price-prev12sec_m2', 0.00078277928156213954),
 ('volume-plate', 0.00077997602760520701),
 ('increment-price-prev9sec', 0.00077843488175570908),
 ('increment-price-prev15sec_m2', 0.00077700723207897921),
 ('increment-price-prev15sec_m3', 0.00076827278849221029),
 ('base-price15sec_m2', 0.00072190171231198874),
 ('increment-price-prev8sec', 0.00070100947680227133),
 ('increment-price-mv4sec_m2', 0.00070022230884673044),
 ('increment-price-prev11sec_m2', 0.00069441943822424654),
 ('ratio-bid_m0_m2', 0.00068408763507321743),
 ('deal-price-avg_m1', 0.000670223233834791),
 ('ratio-bid_m0', 0.00066672046864562414),
 ('increment-price-mv4sec_m1', 0.00063976964598683021),
 ('increment-price-prev10sec_m2', 0.00062740275602357859),
 ('increment-price-prev4sec_m3', 0.00059991798954623211),
 ('increment-price-prev13sec', 0.00059353405731385435),
 ('volume-plate_m0_m3', 0.00058469600089664517),
 ('increment-price-prev10sec', 0.0005564148512928282),
 ('d-avg-low-price_m1', 0.00055239748868112957),
 ('increment-price-mv9sec_m3', 0.00054582263170248682),
 ('increment-price-mv13sec_m3', 0.00054137985354018041),
 ('increment-price-mv10sec_m3', 0.00053908461580338127),
 ('d-avg-low-price', 0.00051711943297796231),
 ('deal-early-second', 0.00051531160896069303),
 ('increment-price-prev6sec_m2', 0.00050254474962323767),
 ('increment-price-mv7sec_m3', 0.00049605714950652288),
 ('increment-price-prev3sec_m2', 0.00048591515136421002),
 ('ratio-bid_m0_m3', 0.00048565950945089921),
 ('d-increment-avg-low-price_m0_m3', 0.00047398419097550662),
 ('increment-price-mv11sec_m3', 0.00047349386196756613),
 ('increment-price-mv5sec_m2', 0.00046269759705009608),
 ('increment-price-mv5sec_m3', 0.00046265101789770525),
 ('deal-early-second_m1', 0.0004573948552650784),
 ('increment-price-mv6sec_m3', 0.00045533044881907147),
 ('increment-price-mv8sec_m3', 0.00045297538242007016),
 ('increment-price-mv11sec_m2', 0.00044976420531731622),
 ('increment-price-mv15sec_m3', 0.00044051090458269073),
 ('increment-price-prev12sec', 0.0004386386646709237),
 ('increment-price-mv14sec_m2', 0.00043814380286039264),
 ('increment-price-mv12sec_m3', 0.00043458563369420255),
 ('increment-price-mv14sec_m3', 0.00042785877712966057),
 ('increment-price-prev7sec_m2', 0.00042569866379151819),
 ('increment-price-prev2sec_m3', 0.00042031682862345528),
 ('increment-price-mv12sec_m2', 0.00042005876370459561),
 ('increment-price-prev14sec_m3', 0.00041123484197987386),
 ('increment-price-prev5sec_m3', 0.00040433588150278231),
 ('increment-price-mv15sec_m2', 0.00040185502145948014),
 ('increment-price-prev7sec_m3', 0.00039359506507417386),
 ('increment-price-prev9sec_m2', 0.00038481713116677213),
 ('increment-price-prev4sec_m2', 0.00038288449179727657),
 ('increment-price-mv9sec_m2', 0.00038107745221196634),
 ('increment-price_m3', 0.00038075731191583947),
 ('increment-price-mv4sec_m3', 0.00037497704605571049),
 ('increment-price-prev8sec_m1', 0.00037066881754119207),
 ('increment-price-prev15sec_m1', 0.00036932755481277387),
 ('increment-price-mv8sec_m2', 0.00036775077407097412),
 ('increment-price-prev6sec_m3', 0.00036514825541463777),
 ('increment-price-prev8sec_m2', 0.00036354838613319443),
 ('increment-price-mv13sec_m2', 0.00036353871582024732),
 ('increment-price-prev13sec_m1', 0.00035713973886455913),
 ('increment-price-mv10sec_m2', 0.00034485734160249229),
 ('increment-price-prev12sec_m1', 0.00034032197982917459),
 ('increment-price-mv6sec_m2', 0.00033402919256113708),
 ('increment-price-mv2sec_m3', 0.00032818673268886911),
 ('deal-early-second_m3', 0.00032488956447451712),
 ('increment-price-prev8sec_m3', 0.00031602962924295966),
 ('increment-price-mv3sec_m3', 0.00031584591636114537),
 ('increment-price-prev9sec_m1', 0.00031188000996121751),
 ('increment-price-mv7sec_m2', 0.00031187938713543145),
 ('increment-price-prev11sec_m1', 0.00031083136395634642),
 ('increment-price-mv7sec_m1', 0.00030830679166693481),
 ('increment-price-prev5sec_m2', 0.00030101433915838226),
 ('d-avg-low-price_m3', 0.00029942702745887334),
 ('increment-price-prev6sec_m1', 0.00029804239135432177),
 ('increment-price-mv15sec_m1', 0.00029256194931823393),
 ('increment-price-prev1sec_m3', 0.00029190923160607408),
 ('increment-price-prev14sec_m1', 0.00029079384198733166),
 ('increment-price-mv14sec_m1', 0.00028343699920041959),
 ('increment-price-mv13sec_m1', 0.0002802398971840631),
 ('increment-price-prev11sec', 0.00027057861293708439),
 ('increment-price-prev13sec_m3', 0.00026120443349661541),
 ('increment-price-prev7sec_m1', 0.00025987705266696914),
 ('increment-price-mv12sec_m1', 0.00024905396436848823),
 ('increment-price-mv10sec_m1', 0.00024771528169111387),
 ('increment-price-prev11sec_m3', 0.00024174131360257134),
 ('increment-price-mv9sec_m1', 0.00024100501465096531),
 ('increment-price-prev4sec_m1', 0.00024003030817281484),
 ('increment-price-mv11sec_m1', 0.00023737444818538298),
 ('increment-price-prev10sec_m1', 0.00023611284162527849),
 ('increment-price-mv8sec_m1', 0.00022922817334592065),
 ('increment-price-prev9sec_m3', 0.00022235288414819011),
 ('increment-price-prev10sec_m3', 0.00021230095286648714),
 ('increment-price-prev3sec_m3', 0.00019082997651420869),
 ('increment-price-prev12sec_m3', 0.00017436188968416287),
 ('increment-price-prev5sec_m1', 0.00013995032434573082)]

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


The End