By: 顾 瞻 GU Zhan (Sam)

Sep 2017

SHL github project: uat_shl

  • training module: shl_tm

  • prediction module: shl_pm

  • simulation module: shl_sm

  • misc module: shl_mm

data feeds:

  • historical bidding price, per second, time series

  • live bidding price, per second, time series

parameter lookup table: python dictionary


In [1]:
import pandas as pd

Read raw data


In [3]:
df_history_ts_process = pd.read_csv('data/history_ts.csv') 
df_history_ts_process.tail()


Out[3]:
ccyy-mm time bid-price ref-price
1886 2017-07 11:29:56 92100 89800
1887 2017-07 11:29:57 92100 89800
1888 2017-07 11:29:58 92100 89800
1889 2017-07 11:29:59 92200 89800
1890 2017-07 11:30:00 92200 89800

In [4]:
df_history_table_process = pd.read_csv('data/history_table.csv') 
df_history_table_process.tail()


Out[4]:
ccyy-mm volume-plate deal-price-low deal-price-avg deal-early-second volume-bidder
26 2017-03 10356 87800 87916 55 262010
27 2017-04 12196 89800 89850 59 252273
28 2017-05 10316 90100 90209 55 270197
29 2017-06 10312 89400 89532 45 244349
30 2017-07 10325 92200 92250 57 269189

In [7]:
df_parm_si = pd.read_csv('data/parm_si.csv') 
print(df_parm_si[(df_parm_si['ccyy-mm'] == '2017-08') & (df_parm_si['time'] == '11:29:00')].iloc[0]['SI'])
df_parm_si.tail()


0.002373838
Out[7]:
ccyy-mm time SI
2191 2017-12 11:29:56 1.0
2192 2017-12 11:29:57 1.0
2193 2017-12 11:29:58 1.0
2194 2017-12 11:29:59 1.0
2195 2017-12 11:30:00 1.0

In [80]:
df_parm_month = pd.read_csv('data/parm_month.csv') 
print(df_parm_month[(df_parm_month['ccyy-mm'] == '2017-07') & (df_parm_month['time'] == '11:29:00')].iloc[0]['di'])
df_parm_month.tail()


300
Out[80]:
ccyy-mm time alpha beta gamma sec57-weight month-weight short-weight direction di
67 2020-08 11:29:00 0.5 0.5 0.5 0.5 0.9 0.125 -1 300
68 2020-09 11:29:00 0.5 0.5 0.5 0.5 0.9 0.125 -1 300
69 2020-10 11:29:00 0.5 0.5 0.5 0.5 0.9 0.125 -1 300
70 2020-11 11:29:00 0.5 0.5 0.5 0.5 0.9 0.125 -1 300
71 2020-12 11:29:00 0.5 0.5 0.5 0.5 0.9 0.125 -1 300

Initialization


In [81]:
# function to fetch Seasonality-Index
def fetech_si(ccyy_mm, time, df_parm_si):
#     return df_parm_si[(df_parm_si['ccyy-mm'] == '2017-09') & (df_parm_si['time'] == '11:29:00')]
    return df_parm_si[(df_parm_si['ccyy-mm'] == ccyy_mm) & (df_parm_si['time'] == time)].iloc[0]['SI']

In [82]:
# function to fetch Dynamic-Increment
def fetech_di(ccyy_mm, df_parm_month):
#     print(df_parm_month[df_parm_month['ccyy-mm'] == '2017-07'].iloc[0]['di'])
    return df_parm_month[df_parm_month['ccyy-mm'] == ccyy_mm].iloc[0]['di']

In [83]:
def get_previous_n_sec_time_as_str(df_time_field, n):
    return str((pd.to_datetime(df_time_field, format='%H:%M:%S') - pd.Timedelta(seconds=n)).time())

In [84]:
#
global_parm_ccyy_mm = '2017-07' # which month to predict?

# create global base price
global_parm_base_price = 10000000

# default set to +200
global_parm_dynamic_increment = 200

# create predictino results dataframe: shl_pm
# df_shl_pm = pd.DataFrame()

global_parm_dynamic_increment = fetech_di(df_history_ts_process['ccyy-mm'][i], df_parm_month)

global_parm_alpha = df_parm_month[df_parm_month['ccyy-mm'] == global_parm_ccyy_mm].iloc[0]['alpha']
global_parm_beta  = df_parm_month[df_parm_month['ccyy-mm'] == global_parm_ccyy_mm].iloc[0]['beta']
global_parm_gamma = df_parm_month[df_parm_month['ccyy-mm'] == global_parm_ccyy_mm].iloc[0]['gamma']



print('global_parm_alpha       : %0.10f' % global_parm_alpha)
print('global_parm_beta        : %0.10f' % global_parm_beta)
print('global_parm_gamma       : %0.10f' % global_parm_gamma)


global_parm_alpha       : 0.6423321348
global_parm_beta        : 0.2302748926
global_parm_gamma       : 0.2180442895

In [ ]:


In [ ]:

Start of shl_sm


In [98]:
for i in range(1830, len(df_history_ts_process)): # use July 2015 data as simulatino
    print('\n<<<< Record No.: %5d >>>>' % i)
    print(df_history_ts_process['ccyy-mm'][i]) # format: ccyy-mm
    print(df_history_ts_process['time'][i]) # format: hh:mm:ss
    print(df_history_ts_process['bid-price'][i]) # format: integer
#     print(df_history_ts_process['ref-price'][i])
    
    # capture & calculate 11:29:00 bid price - 1 = base price
    if df_history_ts_process['time'][i] == '11:29:00':
        global_parm_base_price = df_history_ts_process['bid-price'][i] -1 
        print('#### global_parm_base_price : %d ####' % global_parm_base_price)

        
    print('---- Pre-Process ---')
    # pre-process: ccyy-mm-hh:mm:ss
    current_datetime = df_history_ts_process['ccyy-mm'][i] + ' ' + df_history_ts_process['time'][i]
    current_price4pm = df_history_ts_process['bid-price'][i] -  global_parm_base_price
    print('#### current_datetime   : %s ####' %  current_datetime)
    print('#### current_price4pm   : %d ####' % current_price4pm)
    
    # get Seasonality-Index
    current_si = fetech_si(df_history_ts_process['ccyy-mm'][i]
                                         ,df_history_ts_process['time'][i]
                                         ,df_parm_si)
    print('#### current_si         : %0.10f ####' %  current_si)
    # get de-seasoned price: price4pmsi
    current_price4pmsi = current_price4pm / current_si
    print('#### current_price4pmsi : %0.10f ####' % current_price4pmsi)
    


    if df_history_ts_process['time'][i] == '11:29:00':
        df_shl_pm = pd.DataFrame() # initialize prediction dataframe at 11:29:00
        print('---- call predicitno function shl_pm ---- %s' % df_history_ts_process['time'][i])
        current_pred_les_level = current_price4pmsi
        current_pred_les_trend = 0
        current_pred_les = current_pred_les_level + current_pred_les_trend
        current_pred_les_misc = 0
        current_pred_price = (current_pred_les + current_pred_les_misc) * current_si \
                                + global_parm_base_price
        current_pred_price_rounded = round(current_pred_price/100, 0) * 100
        current_pred_dynamic_increment = global_parm_dynamic_increment
        current_pred_set_price_rounded = current_pred_price_rounded + current_pred_dynamic_increment
    else:
#         call predicitno functino shl_pm
        print('---- call predicitno function shl_pm ---- %s' % df_history_ts_process['time'][i])
        previous_time = get_previous_n_sec_time_as_str(df_history_ts_process['time'][i], 1)
        previous_pred_les_level = df_shl_pm[(df_shl_pm['ccyy-mm'] == df_history_ts_process['ccyy-mm'][i]) \
                                            & (df_shl_pm['time'] ==previous_time)].iloc[0]['pred_les_level']
        print('     previous_pred_les_level ',  previous_pred_les_level)
        
        previous_pred_les_trend = df_shl_pm[(df_shl_pm['ccyy-mm'] == df_history_ts_process['ccyy-mm'][i]) \
                                            & (df_shl_pm['time'] ==previous_time)].iloc[0]['pred_les_trend']
        print('     previous_pred_les_trend ',  previous_pred_les_trend)
        
        current_pred_les_level = global_parm_alpha * current_price4pmsi \
                                    + (1 - global_parm_alpha) * (previous_pred_les_level + previous_pred_les_trend)
        print('     current_pred_les_level ',  current_pred_les_level)
        current_pred_les_trend = global_parm_beta * (current_pred_les_level - previous_pred_les_level) \
                                    + (1 - global_parm_beta) * previous_pred_les_trend
        print('     current_pred_les_trend ',  current_pred_les_trend)
        current_pred_les = current_pred_les_level + current_pred_les_trend
        current_pred_les_misc = 0
        current_pred_price = (current_pred_les + current_pred_les_misc) * current_si \
                                + global_parm_base_price
        current_pred_price_rounded = round(current_pred_price/100, 0) * 100
        current_pred_dynamic_increment = global_parm_dynamic_increment
        current_pred_set_price_rounded = current_pred_price_rounded + current_pred_dynamic_increment
   
        
    # write results to shl_pm dataframe
            
    df_shl_pm_current = {
                         'ccyy-mm' : df_history_ts_process['ccyy-mm'][i]
                        ,'time' : df_history_ts_process['time'][i]
                        ,'bid' : df_history_ts_process['bid-price'][i]
                        ,'datetime' : current_datetime
                        ,'price4pm' : current_price4pm
                        ,'SI' : current_si
                        ,'price4pmsi' :  current_price4pmsi
                        ,'pred_les_level' : current_pred_les_level
                        ,'pred_les_trend' : current_pred_les_trend
                        ,'current_pred_les' : current_pred_les
                        ,'pred_les_misc' : current_pred_les_misc
                        ,'pred_price' : current_pred_price
                        ,'pred_price_rounded' : current_pred_price_rounded
                        ,'pred_dynamic_increment' : current_pred_dynamic_increment # +200 or + 300
                        ,'pred_set_price_rounded' : current_pred_set_price_rounded
                        }
    df_shl_pm =  df_shl_pm.append(df_shl_pm_current, ignore_index=True)


<<<< Record No.:  1830 >>>>
2017-07
11:29:00
90400
#### global_parm_base_price : 90399 ####
---- Pre-Process ---
#### current_datetime   : 2017-07 11:29:00 ####
#### current_price4pm   : 1 ####
#### current_si         : 0.0023669570 ####
#### current_price4pmsi : 422.4833826724 ####
---- call predicitno function shl_pm ---- 11:29:00

<<<< Record No.:  1831 >>>>
2017-07
11:29:01
90400
---- Pre-Process ---
#### current_datetime   : 2017-07 11:29:01 ####
#### current_price4pm   : 1 ####
#### current_si         : 0.0223882810 ####
#### current_price4pmsi : 44.6662251559 ####
---- call predicitno function shl_pm ---- 11:29:01
     previous_pred_les_level  422.483382672
     previous_pred_les_trend  0.0
     current_pred_les_level  179.799281333
     current_pred_les_trend  -55.8840553753

<<<< Record No.:  1832 >>>>
2017-07
11:29:02
90400
---- Pre-Process ---
#### current_datetime   : 2017-07 11:29:02 ####
#### current_price4pm   : 1 ####
#### current_si         : 0.0309107700 ####
#### current_price4pmsi : 32.3511837460 ####
---- call predicitno function shl_pm ---- 11:29:02
     previous_pred_les_level  179.799281333
     previous_pred_les_trend  -55.8840553753
     current_pred_les_level  65.1006992559
     current_pred_les_trend  -69.4275641957

<<<< Record No.:  1833 >>>>
2017-07
11:29:03
90400
---- Pre-Process ---
#### current_datetime   : 2017-07 11:29:03 ####
#### current_price4pm   : 1 ####
#### current_si         : 0.0377696020 ####
#### current_price4pmsi : 26.4763181778 ####
---- call predicitno function shl_pm ---- 11:29:03
     previous_pred_les_level  65.1006992559
     previous_pred_les_trend  -69.4275641957
     current_pred_les_level  15.4590094297
     current_pred_les_trend  -64.8713741

<<<< Record No.:  1834 >>>>
2017-07
11:29:04
90400
---- Pre-Process ---
#### current_datetime   : 2017-07 11:29:04 ####
#### current_price4pm   : 1 ####
#### current_si         : 0.0457052300 ####
#### current_price4pmsi : 21.8793341594 ####
---- call predicitno function shl_pm ---- 11:29:04
     previous_pred_les_level  15.4590094297
     previous_pred_les_trend  -64.8713741
     current_pred_les_level  -3.61941556983
     current_pred_les_trend  -54.3264076634

<<<< Record No.:  1835 >>>>
2017-07
11:29:05
90400
---- Pre-Process ---
#### current_datetime   : 2017-07 11:29:05 ####
#### current_price4pm   : 1 ####
#### current_si         : 0.0452799070 ####
#### current_price4pmsi : 22.0848510135 ####
---- call predicitno function shl_pm ---- 11:29:05
     previous_pred_les_level  -3.61941556983
     previous_pred_les_trend  -54.3264076634
     current_pred_les_level  -6.53954939742
     current_pred_les_trend  -42.4888334761

<<<< Record No.:  1836 >>>>
2017-07
11:29:06
90400
---- Pre-Process ---
#### current_datetime   : 2017-07 11:29:06 ####
#### current_price4pm   : 1 ####
#### current_si         : 0.0807556680 ####
#### current_price4pmsi : 12.3830317396 ####
---- call predicitno function shl_pm ---- 11:29:06
     previous_pred_les_level  -6.53954939742
     previous_pred_les_trend  -42.4888334761
     current_pred_les_level  -9.58185782595
     current_pred_les_trend  -33.4052891567

<<<< Record No.:  1837 >>>>
2017-07
11:29:07
90400
---- Pre-Process ---
#### current_datetime   : 2017-07 11:29:07 ####
#### current_price4pm   : 1 ####
#### current_si         : 0.0985017130 ####
#### current_price4pmsi : 10.1521077100 ####
---- call predicitno function shl_pm ---- 11:29:07
     previous_pred_les_level  -9.58185782595
     previous_pred_les_trend  -33.4052891567
     current_pred_les_level  -8.85409607594
     current_pred_les_trend  -25.5453045246

<<<< Record No.:  1838 >>>>
2017-07
11:29:08
90400
---- Pre-Process ---
#### current_datetime   : 2017-07 11:29:08 ####
#### current_price4pm   : 1 ####
#### current_si         : 0.1361543100 ####
#### current_price4pmsi : 7.3446077469 ####
---- call predicitno function shl_pm ---- 11:29:08
     previous_pred_les_level  -8.85409607594
     previous_pred_les_trend  -25.5453045246
     current_pred_les_level  -7.58588260496
     current_pred_les_trend  -19.3708245475

<<<< Record No.:  1839 >>>>
2017-07
11:29:09
90400
---- Pre-Process ---
#### current_datetime   : 2017-07 11:29:09 ####
#### current_price4pm   : 1 ####
#### current_si         : 0.2041642360 ####
#### current_price4pmsi : 4.8980174961 ####
---- call predicitno function shl_pm ---- 11:29:09
     previous_pred_les_level  -7.58588260496
     previous_pred_les_trend  -19.3708245475
     current_pred_les_level  -6.49539386654
     current_pred_les_trend  -14.6590978278

<<<< Record No.:  1840 >>>>
2017-07
11:29:10
90500
---- Pre-Process ---
#### current_datetime   : 2017-07 11:29:10 ####
#### current_price4pm   : 101 ####
#### current_si         : 0.2310771670 ####
#### current_price4pmsi : 437.0834267671 ####
---- call predicitno function shl_pm ---- 11:29:10
     previous_pred_les_level  -6.49539386654
     previous_pred_les_trend  -14.6590978278
     current_pred_les_level  273.186448702
     current_pred_les_trend  53.1202306142

<<<< Record No.:  1841 >>>>
2017-07
11:29:11
90500
---- Pre-Process ---
#### current_datetime   : 2017-07 11:29:11 ####
#### current_price4pm   : 101 ####
#### current_si         : 0.2910254840 ####
#### current_price4pmsi : 347.0486454032 ####
---- call predicitno function shl_pm ---- 11:29:11
     previous_pred_les_level  273.186448702
     previous_pred_les_trend  53.1202306142
     current_pred_les_level  339.629910672
     current_pred_les_trend  56.1882362839

<<<< Record No.:  1842 >>>>
2017-07
11:29:12
90500
---- Pre-Process ---
#### current_datetime   : 2017-07 11:29:12 ####
#### current_price4pm   : 101 ####
#### current_si         : 0.3431273480 ####
#### current_price4pmsi : 294.3513555206 ####
---- call predicitno function shl_pm ---- 11:29:12
     previous_pred_les_level  339.629910672
     previous_pred_les_trend  56.1882362839
     current_pred_les_level  330.642766205
     current_pred_les_trend  41.1799824804

<<<< Record No.:  1843 >>>>
2017-07
11:29:13
90600
---- Pre-Process ---
#### current_datetime   : 2017-07 11:29:13 ####
#### current_price4pm   : 201 ####
#### current_si         : 0.3510740950 ####
#### current_price4pmsi : 572.5287136324 ####
---- call predicitno function shl_pm ---- 11:29:13
     previous_pred_les_level  330.642766205
     previous_pred_les_trend  41.1799824804
     current_pred_les_level  500.742639611
     current_pred_les_trend  70.866996519

<<<< Record No.:  1844 >>>>
2017-07
11:29:14
90600
---- Pre-Process ---
#### current_datetime   : 2017-07 11:29:14 ####
#### current_price4pm   : 201 ####
#### current_si         : 0.3706555480 ####
#### current_price4pmsi : 542.2824535733 ####
---- call predicitno function shl_pm ---- 11:29:14
     previous_pred_les_level  500.742639611
     previous_pred_les_trend  70.866996519
     current_pred_les_level  552.771844351
     current_pred_les_trend  66.5291260402

<<<< Record No.:  1845 >>>>
2017-07
11:29:15
90600
---- Pre-Process ---
#### current_datetime   : 2017-07 11:29:15 ####
#### current_price4pm   : 201 ####
#### current_si         : 0.4011467510 ####
#### current_price4pmsi : 501.0635122905 ####
---- call predicitno function shl_pm ---- 11:29:15
     previous_pred_les_level  552.771844351
     previous_pred_les_trend  66.5291260402
     current_pred_les_level  543.35325152
     current_pred_les_trend  49.0402732327

<<<< Record No.:  1846 >>>>
2017-07
11:29:16
90700
---- Pre-Process ---
#### current_datetime   : 2017-07 11:29:16 ####
#### current_price4pm   : 301 ####
#### current_si         : 0.4120902590 ####
#### current_price4pmsi : 730.4225067839 ####
---- call predicitno function shl_pm ---- 11:29:16
     previous_pred_les_level  543.35325152
     previous_pred_les_trend  49.0402732327
     current_pred_les_level  681.053975441
     current_pred_les_trend  69.456548994

<<<< Record No.:  1847 >>>>
2017-07
11:29:17
90700
---- Pre-Process ---
#### current_datetime   : 2017-07 11:29:17 ####
#### current_price4pm   : 301 ####
#### current_si         : 0.4535685080 ####
#### current_price4pmsi : 663.6263203705 ####
---- call predicitno function shl_pm ---- 11:29:17
     previous_pred_les_level  681.053975441
     previous_pred_les_trend  69.456548994
     current_pred_les_level  694.702008161
     current_pred_les_trend  56.605248902

<<<< Record No.:  1848 >>>>
2017-07
11:29:18
90700
---- Pre-Process ---
#### current_datetime   : 2017-07 11:29:18 ####
#### current_price4pm   : 301 ####
#### current_si         : 0.4836754840 ####
#### current_price4pmsi : 622.3180830062 ####
---- call predicitno function shl_pm ---- 11:29:18
     previous_pred_les_level  694.702008161
     previous_pred_les_trend  56.605248902
     current_pred_les_level  668.453365529
     current_pred_les_trend  37.5260779264

<<<< Record No.:  1849 >>>>
2017-07
11:29:19
90700
---- Pre-Process ---
#### current_datetime   : 2017-07 11:29:19 ####
#### current_price4pm   : 301 ####
#### current_si         : 0.5045423610 ####
#### current_price4pmsi : 596.5802344196 ####
---- call predicitno function shl_pm ---- 11:29:19
     previous_pred_les_level  668.453365529
     previous_pred_les_trend  37.5260779264
     current_pred_les_level  635.708815974
     current_pred_les_trend  21.344516729

<<<< Record No.:  1850 >>>>
2017-07
11:29:20
90700
---- Pre-Process ---
#### current_datetime   : 2017-07 11:29:20 ####
#### current_price4pm   : 301 ####
#### current_si         : 0.5273150370 ####
#### current_price4pmsi : 570.8162651921 ####
---- call predicitno function shl_pm ---- 11:29:20
     previous_pred_les_level  635.708815974
     previous_pred_les_trend  21.344516729
     current_pred_les_level  601.660493033
     current_pred_les_trend  8.58893652236

<<<< Record No.:  1851 >>>>
2017-07
11:29:21
90700
---- Pre-Process ---
#### current_datetime   : 2017-07 11:29:21 ####
#### current_price4pm   : 301 ####
#### current_si         : 0.5666965740 ####
#### current_price4pmsi : 531.1484378234 ####
---- call predicitno function shl_pm ---- 11:29:21
     previous_pred_les_level  601.660493033
     previous_pred_les_trend  8.58893652236
     current_pred_les_level  559.440320674
     current_pred_les_trend  -3.1111255691

<<<< Record No.:  1852 >>>>
2017-07
11:29:22
90700
---- Pre-Process ---
#### current_datetime   : 2017-07 11:29:22 ####
#### current_price4pm   : 301 ####
#### current_si         : 0.5783832890 ####
#### current_price4pmsi : 520.4161422444 ####
---- call predicitno function shl_pm ---- 11:29:22
     previous_pred_les_level  559.440320674
     previous_pred_les_trend  -3.1111255691
     current_pred_les_level  533.261087195
     current_pred_les_trend  -8.42313164085

<<<< Record No.:  1853 >>>>
2017-07
11:29:23
90700
---- Pre-Process ---
#### current_datetime   : 2017-07 11:29:23 ####
#### current_price4pm   : 301 ####
#### current_si         : 0.5903581650 ####
#### current_price4pmsi : 509.8599762739 ####
---- call predicitno function shl_pm ---- 11:29:23
     previous_pred_les_level  533.261087195
     previous_pred_les_trend  -8.42313164085
     current_pred_les_level  515.217118149
     current_pred_les_trend  -10.6385689413

<<<< Record No.:  1854 >>>>
2017-07
11:29:24
90700
---- Pre-Process ---
#### current_datetime   : 2017-07 11:29:24 ####
#### current_price4pm   : 301 ####
#### current_si         : 0.6203383340 ####
#### current_price4pmsi : 485.2190869120 ####
---- call predicitno function shl_pm ---- 11:29:24
     previous_pred_les_level  515.217118149
     previous_pred_les_trend  -10.6385689413
     current_pred_les_level  492.143344463
     current_pred_les_trend  -13.5020843784

<<<< Record No.:  1855 >>>>
2017-07
11:29:25
90700
---- Pre-Process ---
#### current_datetime   : 2017-07 11:29:25 ####
#### current_price4pm   : 301 ####
#### current_si         : 0.6624022500 ####
#### current_price4pmsi : 454.4066690595 ####
---- call predicitno function shl_pm ---- 11:29:25
     previous_pred_les_level  492.143344463
     previous_pred_les_trend  -13.5020843784
     current_pred_les_level  463.074603496
     current_pred_les_trend  -17.0866945527

<<<< Record No.:  1856 >>>>
2017-07
11:29:26
90700
---- Pre-Process ---
#### current_datetime   : 2017-07 11:29:26 ####
#### current_price4pm   : 301 ####
#### current_si         : 0.6803182270 ####
#### current_price4pmsi : 442.4400053594 ####
---- call predicitno function shl_pm ---- 11:29:26
     previous_pred_les_level  463.074603496
     previous_pred_les_trend  -17.0866945527
     current_pred_les_level  443.70897646
     current_pred_les_trend  -17.6114754855

<<<< Record No.:  1857 >>>>
2017-07
11:29:27
90700
---- Pre-Process ---
#### current_datetime   : 2017-07 11:29:27 ####
#### current_price4pm   : 301 ####
#### current_si         : 0.7013944910 ####
#### current_price4pmsi : 429.1450871974 ####
---- call predicitno function shl_pm ---- 11:29:27
     previous_pred_les_level  443.70897646
     previous_pred_les_trend  -17.6114754855
     current_pred_les_level  428.055063539
     current_pred_les_trend  -17.1606979762

<<<< Record No.:  1858 >>>>
2017-07
11:29:28
90700
---- Pre-Process ---
#### current_datetime   : 2017-07 11:29:28 ####
#### current_price4pm   : 301 ####
#### current_si         : 0.7261122680 ####
#### current_price4pmsi : 414.5364474134 ####
---- call predicitno function shl_pm ---- 11:29:28
     previous_pred_les_level  428.055063539
     previous_pred_les_trend  -17.1606979762
     current_pred_les_level  413.233791773
     current_pred_les_trend  -16.621986857

<<<< Record No.:  1859 >>>>
2017-07
11:29:29
90700
---- Pre-Process ---
#### current_datetime   : 2017-07 11:29:29 ####
#### current_price4pm   : 301 ####
#### current_si         : 0.7412284280 ####
#### current_price4pmsi : 406.0826442021 ####
---- call predicitno function shl_pm ---- 11:29:29
     previous_pred_les_level  413.233791773
     previous_pred_les_trend  -16.621986857
     current_pred_les_level  402.695229333
     current_pred_les_trend  -15.2211269527

<<<< Record No.:  1860 >>>>
2017-07
11:29:30
90700
---- Pre-Process ---
#### current_datetime   : 2017-07 11:29:30 ####
#### current_price4pm   : 301 ####
#### current_si         : 0.7848751150 ####
#### current_price4pmsi : 383.5005012231 ####
---- call predicitno function shl_pm ---- 11:29:30
     previous_pred_les_level  402.695229333
     previous_pred_les_trend  -15.2211269527
     current_pred_les_level  384.921730666
     current_pred_les_trend  -15.808874075

<<<< Record No.:  1861 >>>>
2017-07
11:29:31
90700
---- Pre-Process ---
#### current_datetime   : 2017-07 11:29:31 ####
#### current_price4pm   : 301 ####
#### current_si         : 0.7883406290 ####
#### current_price4pmsi : 381.8146482972 ####
---- call predicitno function shl_pm ---- 11:29:31
     previous_pred_les_level  384.921730666
     previous_pred_les_trend  -15.808874075
     current_pred_les_level  377.271625573
     current_pred_les_trend  -13.9301144238

<<<< Record No.:  1862 >>>>
2017-07
11:29:32
90700
---- Pre-Process ---
#### current_datetime   : 2017-07 11:29:32 ####
#### current_price4pm   : 301 ####
#### current_si         : 0.8143918490 ####
#### current_price4pmsi : 369.6009486952 ####
---- call predicitno function shl_pm ---- 11:29:32
     previous_pred_les_level  377.271625573
     previous_pred_les_trend  -13.9301144238
     current_pred_les_level  367.362149031
     current_pred_les_trend  -13.0042624674

<<<< Record No.:  1863 >>>>
2017-07
11:29:33
90700
---- Pre-Process ---
#### current_datetime   : 2017-07 11:29:33 ####
#### current_price4pm   : 301 ####
#### current_si         : 0.8351005610 ####
#### current_price4pmsi : 360.4356338111 ####
---- call predicitno function shl_pm ---- 11:29:33
     previous_pred_les_level  367.362149031
     previous_pred_les_trend  -13.0042624674
     current_pred_les_level  358.261818927
     current_pred_les_trend  -12.1052848615

<<<< Record No.:  1864 >>>>
2017-07
11:29:34
90700
---- Pre-Process ---
#### current_datetime   : 2017-07 11:29:34 ####
#### current_price4pm   : 301 ####
#### current_si         : 0.8670445380 ####
#### current_price4pmsi : 347.1563302784 ####
---- call predicitno function shl_pm ---- 11:29:34
     previous_pred_les_level  358.261818927
     previous_pred_les_trend  -12.1052848615
     current_pred_les_level  346.798735301
     current_pred_les_trend  -11.9574020409

<<<< Record No.:  1865 >>>>
2017-07
11:29:35
90800
---- Pre-Process ---
#### current_datetime   : 2017-07 11:29:35 ####
#### current_price4pm   : 401 ####
#### current_si         : 0.9216129500 ####
#### current_price4pmsi : 435.1067332550 ####
---- call predicitno function shl_pm ---- 11:29:35
     previous_pred_les_level  346.798735301
     previous_pred_les_trend  -11.9574020409
     current_pred_les_level  399.245021682
     current_pred_les_trend  2.8731503944

<<<< Record No.:  1866 >>>>
2017-07
11:29:36
90800
---- Pre-Process ---
#### current_datetime   : 2017-07 11:29:36 ####
#### current_price4pm   : 401 ####
#### current_si         : 0.9539289700 ####
#### current_price4pmsi : 420.3667281433 ####
---- call predicitno function shl_pm ---- 11:29:36
     previous_pred_les_level  399.245021682
     previous_pred_les_trend  2.8731503944
     current_pred_les_level  413.839806051
     current_pred_les_trend  5.57234839919

<<<< Record No.:  1867 >>>>
2017-07
11:29:37
90900
---- Pre-Process ---
#### current_datetime   : 2017-07 11:29:37 ####
#### current_price4pm   : 501 ####
#### current_si         : 0.9779660700 ####
#### current_price4pmsi : 512.2877115767 ####
---- call predicitno function shl_pm ---- 11:29:37
     previous_pred_les_level  413.839806051
     previous_pred_les_trend  5.57234839919
     current_pred_les_level  479.069109327
     current_pred_les_trend  19.3098472771

<<<< Record No.:  1868 >>>>
2017-07
11:29:38
91000
---- Pre-Process ---
#### current_datetime   : 2017-07 11:29:38 ####
#### current_price4pm   : 601 ####
#### current_si         : 0.9935136330 ####
#### current_price4pmsi : 604.9237574982 ####
---- call predicitno function shl_pm ---- 11:29:38
     previous_pred_les_level  479.069109327
     previous_pred_les_trend  19.3098472771
     current_pred_les_level  566.816106011
     current_pred_les_trend  35.0692045076

<<<< Record No.:  1869 >>>>
2017-07
11:29:39
91000
---- Pre-Process ---
#### current_datetime   : 2017-07 11:29:39 ####
#### current_price4pm   : 601 ####
#### current_si         : 1.0325517050 ####
#### current_price4pmsi : 582.0531766978 ####
---- call predicitno function shl_pm ---- 11:29:39
     previous_pred_les_level  566.816106011
     previous_pred_les_trend  35.0692045076
     current_pred_les_level  589.146493664
     current_pred_les_trend  32.1357748245

<<<< Record No.:  1870 >>>>
2017-07
11:29:40
91000
---- Pre-Process ---
#### current_datetime   : 2017-07 11:29:40 ####
#### current_price4pm   : 601 ####
#### current_si         : 1.0762695320 ####
#### current_price4pmsi : 558.4103072055 ####
---- call predicitno function shl_pm ---- 11:29:40
     previous_pred_les_level  589.146493664
     previous_pred_les_trend  32.1357748245
     current_pred_les_level  580.897587381
     current_pred_les_trend  22.836196719

<<<< Record No.:  1871 >>>>
2017-07
11:29:41
91000
---- Pre-Process ---
#### current_datetime   : 2017-07 11:29:41 ####
#### current_price4pm   : 601 ####
#### current_si         : 1.1032848210 ####
#### current_price4pmsi : 544.7369424110 ####
---- call predicitno function shl_pm ---- 11:29:41
     previous_pred_les_level  580.897587381
     previous_pred_les_trend  22.836196719
     current_pred_les_level  565.838216833
     current_pred_les_trend  14.1097990361

<<<< Record No.:  1872 >>>>
2017-07
11:29:42
91000
---- Pre-Process ---
#### current_datetime   : 2017-07 11:29:42 ####
#### current_price4pm   : 601 ####
#### current_si         : 1.1629896100 ####
#### current_price4pmsi : 516.7715986732 ####
---- call predicitno function shl_pm ---- 11:29:42
     previous_pred_les_level  565.838216833
     previous_pred_les_trend  14.1097990361
     current_pred_les_level  539.367772945
     current_pred_les_trend  4.76518795439

<<<< Record No.:  1873 >>>>
2017-07
11:29:43
91000
---- Pre-Process ---
#### current_datetime   : 2017-07 11:29:43 ####
#### current_price4pm   : 601 ####
#### current_si         : 1.2717913130 ####
#### current_price4pmsi : 472.5618062151 ####
---- call predicitno function shl_pm ---- 11:29:43
     previous_pred_les_level  539.367772945
     previous_pred_les_trend  4.76518795439
     current_pred_les_level  498.160508323
     current_pred_les_trend  -5.82111362579

<<<< Record No.:  1874 >>>>
2017-07
11:29:44
91100
---- Pre-Process ---
#### current_datetime   : 2017-07 11:29:44 ####
#### current_price4pm   : 701 ####
#### current_si         : 1.3866613510 ####
#### current_price4pmsi : 505.5307840624 ####
---- call predicitno function shl_pm ---- 11:29:44
     previous_pred_les_level  498.160508323
     previous_pred_les_trend  -5.82111362579
     current_pred_les_level  500.812647989
     current_pred_les_trend  -3.86993613403

<<<< Record No.:  1875 >>>>
2017-07
11:29:45
91100
---- Pre-Process ---
#### current_datetime   : 2017-07 11:29:45 ####
#### current_price4pm   : 701 ####
#### current_si         : 1.4370894140 ####
#### current_price4pmsi : 487.7914993813 ####
---- call predicitno function shl_pm ---- 11:29:45
     previous_pred_les_level  500.812647989
     previous_pred_les_trend  -3.86993613403
     current_pred_les_level  491.064594011
     current_pred_les_trend  -5.22351908928

<<<< Record No.:  1876 >>>>
2017-07
11:29:46
91200
---- Pre-Process ---
#### current_datetime   : 2017-07 11:29:46 ####
#### current_price4pm   : 801 ####
#### current_si         : 1.5686206330 ####
#### current_price4pmsi : 510.6397194764 ####
---- call predicitno function shl_pm ---- 11:29:46
     previous_pred_les_level  491.064594011
     previous_pred_les_trend  -5.22351908928
     current_pred_les_level  501.770041218
     current_pred_les_trend  -1.55547808596

<<<< Record No.:  1877 >>>>
2017-07
11:29:47
91300
---- Pre-Process ---
#### current_datetime   : 2017-07 11:29:47 ####
#### current_price4pm   : 901 ####
#### current_si         : 1.6413910300 ####
#### current_price4pmsi : 548.9246520374 ####
---- call predicitno function shl_pm ---- 11:29:47
     previous_pred_les_level  501.770041218
     previous_pred_les_trend  -1.55547808596
     current_pred_les_level  531.502618523
     current_pred_les_trend  5.64937550942

<<<< Record No.:  1878 >>>>
2017-07
11:29:48
91400
---- Pre-Process ---
#### current_datetime   : 2017-07 11:29:48 ####
#### current_price4pm   : 1001 ####
#### current_si         : 1.7490712830 ####
#### current_price4pmsi : 572.3037189674 ####
---- call predicitno function shl_pm ---- 11:29:48
     previous_pred_les_level  531.502618523
     previous_pred_les_trend  5.64937550942
     current_pred_les_level  559.731076551
     current_pred_les_trend  10.8487713116

<<<< Record No.:  1879 >>>>
2017-07
11:29:49
91400
---- Pre-Process ---
#### current_datetime   : 2017-07 11:29:49 ####
#### current_price4pm   : 1001 ####
#### current_si         : 1.7897347710 ####
#### current_price4pmsi : 559.3007501557 ####
---- call predicitno function shl_pm ---- 11:29:49
     previous_pred_les_level  559.731076551
     previous_pred_les_trend  10.8487713116
     current_pred_les_level  563.334920954
     current_pred_les_trend  9.18044654583

<<<< Record No.:  1880 >>>>
2017-07
11:29:50
91500
---- Pre-Process ---
#### current_datetime   : 2017-07 11:29:50 ####
#### current_price4pm   : 1101 ####
#### current_si         : 1.9329318490 ####
#### current_price4pmsi : 569.6010444288 ####
---- call predicitno function shl_pm ---- 11:29:50
     previous_pred_les_level  563.334920954
     previous_pred_les_trend  9.18044654583
     current_pred_les_level  570.64340414
     current_pred_les_trend  8.74938038418

<<<< Record No.:  1881 >>>>
2017-07
11:29:51
91600
---- Pre-Process ---
#### current_datetime   : 2017-07 11:29:51 ####
#### current_price4pm   : 1201 ####
#### current_si         : 2.0011852710 ####
#### current_price4pmsi : 600.1443331630 ####
---- call predicitno function shl_pm ---- 11:29:51
     previous_pred_les_level  570.64340414
     previous_pred_les_trend  8.74938038418
     current_pred_les_level  592.722171061
     current_pred_les_trend  11.8188034376

<<<< Record No.:  1882 >>>>
2017-07
11:29:52
91700
---- Pre-Process ---
#### current_datetime   : 2017-07 11:29:52 ####
#### current_price4pm   : 1301 ####
#### current_si         : 2.0661036070 ####
#### current_price4pmsi : 629.6876863252 ####
---- call predicitno function shl_pm ---- 11:29:52
     previous_pred_les_level  592.722171061
     previous_pred_les_trend  11.8188034376
     current_pred_les_level  620.693515589
     current_pred_les_trend  15.5383281025

<<<< Record No.:  1883 >>>>
2017-07
11:29:53
91800
---- Pre-Process ---
#### current_datetime   : 2017-07 11:29:53 ####
#### current_price4pm   : 1401 ####
#### current_si         : 2.1682095660 ####
#### current_price4pmsi : 646.1552526883 ####
---- call predicitno function shl_pm ---- 11:29:53
     previous_pred_les_level  620.693515589
     previous_pred_les_trend  15.5383281025
     current_pred_les_level  642.605968176
     current_pred_les_trend  17.0061289339

<<<< Record No.:  1884 >>>>
2017-07
11:29:54
91900
---- Pre-Process ---
#### current_datetime   : 2017-07 11:29:54 ####
#### current_price4pm   : 1501 ####
#### current_si         : 2.2903489060 ####
#### current_price4pmsi : 655.3586643798 ####
---- call predicitno function shl_pm ---- 11:29:54
     previous_pred_les_level  642.605968176
     previous_pred_les_trend  17.0061289339
     current_pred_les_level  656.879980584
     current_pred_les_trend  16.3769910943

<<<< Record No.:  1885 >>>>
2017-07
11:29:55
92000
---- Pre-Process ---
#### current_datetime   : 2017-07 11:29:55 ####
#### current_price4pm   : 1601 ####
#### current_si         : 2.4136021070 ####
#### current_price4pmsi : 663.3239154692 ####
---- call predicitno function shl_pm ---- 11:29:55
     previous_pred_les_level  656.879980584
     previous_pred_les_trend  16.3769910943
     current_pred_les_level  666.876650479
     current_pred_les_trend  14.9077633151

<<<< Record No.:  1886 >>>>
2017-07
11:29:56
92100
---- Pre-Process ---
#### current_datetime   : 2017-07 11:29:56 ####
#### current_price4pm   : 1701 ####
#### current_si         : 2.5506970550 ####
#### current_price4pmsi : 666.8765295611 ####
---- call predicitno function shl_pm ---- 11:29:56
     previous_pred_les_level  666.876650479
     previous_pred_les_trend  14.9077633151
     current_pred_les_level  672.20860069
     current_pred_les_trend  12.7026939808

<<<< Record No.:  1887 >>>>
2017-07
11:29:57
92100
---- Pre-Process ---
#### current_datetime   : 2017-07 11:29:57 ####
#### current_price4pm   : 1701 ####
#### current_si         : 2.7053908880 ####
#### current_price4pmsi : 628.7446326314 ####
---- call predicitno function shl_pm ---- 11:29:57
     previous_pred_les_level  672.20860069
     previous_pred_les_trend  12.7026939808
     current_pred_les_level  648.83364274
     current_pred_les_trend  4.39491655677

<<<< Record No.:  1888 >>>>
2017-07
11:29:58
92100
---- Pre-Process ---
#### current_datetime   : 2017-07 11:29:58 ####
#### current_price4pm   : 1701 ####
#### current_si         : 2.7745487590 ####
#### current_price4pmsi : 613.0726643323 ####
---- call predicitno function shl_pm ---- 11:29:58
     previous_pred_les_level  648.83364274
     previous_pred_les_trend  4.39491655677
     current_pred_les_level  627.435137561
     current_pred_les_trend  -1.5446608637

<<<< Record No.:  1889 >>>>
2017-07
11:29:59
92200
---- Pre-Process ---
#### current_datetime   : 2017-07 11:29:59 ####
#### current_price4pm   : 1801 ####
#### current_si         : 2.9291830210 ####
#### current_price4pmsi : 614.8472072548 ####
---- call predicitno function shl_pm ---- 11:29:59
     previous_pred_les_level  627.435137561
     previous_pred_les_trend  -1.5446608637
     current_pred_les_level  618.797029861
     current_pred_les_trend  -3.17810357207

<<<< Record No.:  1890 >>>>
2017-07
11:30:00
92200
---- Pre-Process ---
#### current_datetime   : 2017-07 11:30:00 ####
#### current_price4pm   : 1801 ####
#### current_si         : 3.0710424510 ####
#### current_price4pmsi : 586.4458172545 ####
---- call predicitno function shl_pm ---- 11:30:00
     previous_pred_les_level  618.797029861
     previous_pred_les_trend  -3.17810357207
     current_pred_les_level  596.880100885
     current_pred_les_trend  -7.49318457974

In [99]:
df_shl_pm.head()


Out[99]:
SI bid ccyy-mm current_pred_les datetime pred_dynamic_increment pred_les_level pred_les_misc pred_les_trend pred_price pred_price_rounded pred_set_price_rounded price4pm price4pmsi time
0 0.002367 90400.0 2017-07 422.483383 2017-07 11:29:00 300.0 422.483383 0.0 0.000000 90400.000000 90400.0 90700.0 1.0 422.483383 11:29:00
1 0.022388 90400.0 2017-07 123.915226 2017-07 11:29:01 300.0 179.799281 0.0 -55.884055 90401.774249 90400.0 90700.0 1.0 44.666225 11:29:01
2 0.030911 90400.0 2017-07 -4.326865 2017-07 11:29:02 300.0 65.100699 0.0 -69.427564 90398.866253 90400.0 90700.0 1.0 32.351184 11:29:02
3 0.037770 90400.0 2017-07 -49.412365 2017-07 11:29:03 300.0 15.459009 0.0 -64.871374 90397.133715 90400.0 90700.0 1.0 26.476318 11:29:03
4 0.045705 90400.0 2017-07 -57.945823 2017-07 11:29:04 300.0 -3.619416 0.0 -54.326408 90396.351573 90400.0 90700.0 1.0 21.879334 11:29:04

In [100]:
df_shl_pm.tail()


Out[100]:
SI bid ccyy-mm current_pred_les datetime pred_dynamic_increment pred_les_level pred_les_misc pred_les_trend pred_price pred_price_rounded pred_set_price_rounded price4pm price4pmsi time
56 2.550697 92100.0 2017-07 684.911295 2017-07 11:29:56 300.0 672.208601 0.0 12.702694 92146.001222 92100.0 92400.0 1701.0 666.876530 11:29:56
57 2.705391 92100.0 2017-07 653.228559 2017-07 11:29:57 300.0 648.833643 0.0 4.394917 92166.238592 92200.0 92500.0 1701.0 628.744633 11:29:57
58 2.774549 92100.0 2017-07 625.890477 2017-07 11:29:58 300.0 627.435138 0.0 -1.544661 92135.563645 92100.0 92400.0 1701.0 613.072664 11:29:58
59 2.929183 92200.0 2017-07 615.618926 2017-07 11:29:59 300.0 618.797030 0.0 -3.178104 92202.260506 92200.0 92500.0 1801.0 614.847207 11:29:59
60 3.071042 92200.0 2017-07 589.386916 2017-07 11:30:00 300.0 596.880101 0.0 -7.493185 92209.032240 92200.0 92500.0 1801.0 586.445817 11:30:00

In [101]:
df_shl_pm


Out[101]:
SI bid ccyy-mm current_pred_les datetime pred_dynamic_increment pred_les_level pred_les_misc pred_les_trend pred_price pred_price_rounded pred_set_price_rounded price4pm price4pmsi time
0 0.002367 90400.0 2017-07 422.483383 2017-07 11:29:00 300.0 422.483383 0.0 0.000000 90400.000000 90400.0 90700.0 1.0 422.483383 11:29:00
1 0.022388 90400.0 2017-07 123.915226 2017-07 11:29:01 300.0 179.799281 0.0 -55.884055 90401.774249 90400.0 90700.0 1.0 44.666225 11:29:01
2 0.030911 90400.0 2017-07 -4.326865 2017-07 11:29:02 300.0 65.100699 0.0 -69.427564 90398.866253 90400.0 90700.0 1.0 32.351184 11:29:02
3 0.037770 90400.0 2017-07 -49.412365 2017-07 11:29:03 300.0 15.459009 0.0 -64.871374 90397.133715 90400.0 90700.0 1.0 26.476318 11:29:03
4 0.045705 90400.0 2017-07 -57.945823 2017-07 11:29:04 300.0 -3.619416 0.0 -54.326408 90396.351573 90400.0 90700.0 1.0 21.879334 11:29:04
5 0.045280 90400.0 2017-07 -49.028383 2017-07 11:29:05 300.0 -6.539549 0.0 -42.488833 90396.779999 90400.0 90700.0 1.0 22.084851 11:29:05
6 0.080756 90400.0 2017-07 -42.987147 2017-07 11:29:06 300.0 -9.581858 0.0 -33.405289 90395.528544 90400.0 90700.0 1.0 12.383032 11:29:06
7 0.098502 90400.0 2017-07 -34.399401 2017-07 11:29:07 300.0 -8.854096 0.0 -25.545305 90395.611600 90400.0 90700.0 1.0 10.152108 11:29:07
8 0.136154 90400.0 2017-07 -26.956707 2017-07 11:29:08 300.0 -7.585883 0.0 -19.370825 90395.329728 90400.0 90700.0 1.0 7.344608 11:29:08
9 0.204164 90400.0 2017-07 -21.154492 2017-07 11:29:09 300.0 -6.495394 0.0 -14.659098 90394.681009 90400.0 90700.0 1.0 4.898017 11:29:09
10 0.231077 90500.0 2017-07 326.306679 2017-07 11:29:10 300.0 273.186449 0.0 53.120231 90474.402023 90500.0 90800.0 101.0 437.083427 11:29:10
11 0.291025 90500.0 2017-07 395.818147 2017-07 11:29:11 300.0 339.629911 0.0 56.188236 90514.193168 90500.0 90800.0 101.0 347.048645 11:29:11
12 0.343127 90500.0 2017-07 371.822749 2017-07 11:29:12 300.0 330.642766 0.0 41.179982 90526.582554 90500.0 90800.0 101.0 294.351356 11:29:12
13 0.351074 90600.0 2017-07 571.609636 2017-07 11:29:13 300.0 500.742640 0.0 70.866997 90599.677336 90600.0 90900.0 201.0 572.528714 11:29:13
14 0.370656 90600.0 2017-07 619.300970 2017-07 11:29:14 300.0 552.771844 0.0 66.529126 90628.547341 90600.0 90900.0 201.0 542.282454 11:29:14
15 0.401147 90600.0 2017-07 592.393525 2017-07 11:29:15 300.0 543.353252 0.0 49.040273 90636.636738 90600.0 90900.0 201.0 501.063512 11:29:15
16 0.412090 90700.0 2017-07 750.510524 2017-07 11:29:16 300.0 681.053975 0.0 69.456549 90708.278076 90700.0 91000.0 301.0 730.422507 11:29:16
17 0.453569 90700.0 2017-07 751.307257 2017-07 11:29:17 300.0 694.702008 0.0 56.605249 90739.769312 90700.0 91000.0 301.0 663.626320 11:29:17
18 0.483675 90700.0 2017-07 705.979443 2017-07 11:29:18 300.0 668.453366 0.0 37.526078 90740.464949 90700.0 91000.0 301.0 622.318083 11:29:18
19 0.504542 90700.0 2017-07 657.053333 2017-07 11:29:19 300.0 635.708816 0.0 21.344517 90730.511240 90700.0 91000.0 301.0 596.580234 11:29:19
20 0.527315 90700.0 2017-07 610.249430 2017-07 11:29:20 300.0 601.660493 0.0 8.588937 90720.793701 90700.0 91000.0 301.0 570.816265 11:29:20
21 0.566697 90700.0 2017-07 556.329195 2017-07 11:29:21 300.0 559.440321 0.0 -3.111126 90714.269849 90700.0 91000.0 301.0 531.148438 11:29:21
22 0.578383 90700.0 2017-07 524.837956 2017-07 11:29:22 300.0 533.261087 0.0 -8.423132 90702.557503 90700.0 91000.0 301.0 520.416142 11:29:22
23 0.590358 90700.0 2017-07 504.578549 2017-07 11:29:23 300.0 515.217118 0.0 -10.638569 90696.882066 90700.0 91000.0 301.0 509.859976 11:29:23
24 0.620338 90700.0 2017-07 478.641260 2017-07 11:29:24 300.0 492.143344 0.0 -13.502084 90695.919522 90700.0 91000.0 301.0 485.219087 11:29:24
25 0.662402 90700.0 2017-07 445.987909 2017-07 11:29:25 300.0 463.074603 0.0 -17.086695 90694.423394 90700.0 91000.0 301.0 454.406669 11:29:25
26 0.680318 90700.0 2017-07 426.097501 2017-07 11:29:26 300.0 443.708976 0.0 -17.611475 90688.881896 90700.0 91000.0 301.0 442.440005 11:29:26
27 0.701394 90700.0 2017-07 410.894366 2017-07 11:29:27 300.0 428.055064 0.0 -17.160698 90687.199044 90700.0 91000.0 301.0 429.145087 11:29:27
28 0.726112 90700.0 2017-07 396.611805 2017-07 11:29:28 300.0 413.233792 0.0 -16.621987 90686.984697 90700.0 91000.0 301.0 414.536447 11:29:28
29 0.741228 90700.0 2017-07 387.474102 2017-07 11:29:29 300.0 402.695229 0.0 -15.221127 90686.206820 90700.0 91000.0 301.0 406.082644 11:29:29
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
31 0.788341 90700.0 2017-07 363.341511 2017-07 11:29:31 300.0 377.271626 0.0 -13.930114 90685.436875 90700.0 91000.0 301.0 381.814648 11:29:31
32 0.814392 90700.0 2017-07 354.357887 2017-07 11:29:32 300.0 367.362149 0.0 -13.004262 90687.586174 90700.0 91000.0 301.0 369.600949 11:29:32
33 0.835101 90700.0 2017-07 346.156534 2017-07 11:29:33 300.0 358.261819 0.0 -12.105285 90688.075516 90700.0 91000.0 301.0 360.435634 11:29:33
34 0.867045 90700.0 2017-07 334.841333 2017-07 11:29:34 300.0 346.798735 0.0 -11.957402 90689.322349 90700.0 91000.0 301.0 347.156330 11:29:34
35 0.921613 90800.0 2017-07 402.118172 2017-07 11:29:35 300.0 399.245022 0.0 2.873150 90769.597315 90800.0 91100.0 401.0 435.106733 11:29:35
36 0.953929 90800.0 2017-07 419.412154 2017-07 11:29:36 300.0 413.839806 0.0 5.572348 90799.089405 90800.0 91100.0 401.0 420.366728 11:29:36
37 0.977966 90900.0 2017-07 498.378957 2017-07 11:29:37 300.0 479.069109 0.0 19.309847 90886.397710 90900.0 91200.0 501.0 512.287712 11:29:37
38 0.993514 91000.0 2017-07 601.885311 2017-07 11:29:38 300.0 566.816106 0.0 35.069205 90996.981262 91000.0 91300.0 601.0 604.923757 11:29:38
39 1.032552 91000.0 2017-07 621.282268 2017-07 11:29:39 300.0 589.146494 0.0 32.135775 91040.506066 91000.0 91300.0 601.0 582.053177 11:29:39
40 1.076270 91000.0 2017-07 603.733784 2017-07 11:29:40 300.0 580.897587 0.0 22.836197 91048.780277 91000.0 91300.0 601.0 558.410307 11:29:40
41 1.103285 91000.0 2017-07 579.948016 2017-07 11:29:41 300.0 565.838217 0.0 14.109799 91038.847843 91000.0 91300.0 601.0 544.736942 11:29:41
42 1.162990 91000.0 2017-07 544.132961 2017-07 11:29:42 300.0 539.367773 0.0 4.765188 91031.820980 91000.0 91300.0 601.0 516.771599 11:29:42
43 1.271791 91000.0 2017-07 492.339395 2017-07 11:29:43 300.0 498.160508 0.0 -5.821114 91025.152965 91000.0 91300.0 601.0 472.561806 11:29:43
44 1.386661 91100.0 2017-07 496.942712 2017-07 11:29:44 300.0 500.812648 0.0 -3.869936 91088.091252 91100.0 91400.0 701.0 505.530784 11:29:44
45 1.437089 91100.0 2017-07 485.841075 2017-07 11:29:45 300.0 491.064594 0.0 -5.223519 91097.197066 91100.0 91400.0 701.0 487.791499 11:29:45
46 1.568621 91200.0 2017-07 500.214563 2017-07 11:29:46 300.0 501.770041 0.0 -1.555478 91183.646885 91200.0 91500.0 801.0 510.639719 11:29:46
47 1.641391 91300.0 2017-07 537.151994 2017-07 11:29:47 300.0 531.502619 0.0 5.649376 91280.676465 91300.0 91600.0 901.0 548.924652 11:29:47
48 1.749071 91400.0 2017-07 570.579848 2017-07 11:29:48 300.0 559.731077 0.0 10.848771 91396.984827 91400.0 91700.0 1001.0 572.303719 11:29:48
49 1.789735 91400.0 2017-07 572.515368 2017-07 11:29:49 300.0 563.334921 0.0 9.180447 91423.650660 91400.0 91700.0 1001.0 559.300750 11:29:49
50 1.932932 91500.0 2017-07 579.392785 2017-07 11:29:50 300.0 570.643404 0.0 8.749380 91518.926766 91500.0 91800.0 1101.0 569.601044 11:29:50
51 2.001185 91600.0 2017-07 604.540974 2017-07 11:29:51 300.0 592.722171 0.0 11.818803 91608.798494 91600.0 91900.0 1201.0 600.144333 11:29:51
52 2.066104 91700.0 2017-07 636.231844 2017-07 11:29:52 300.0 620.693516 0.0 15.538328 91713.520907 91700.0 92000.0 1301.0 629.687686 11:29:52
53 2.168210 91800.0 2017-07 659.612097 2017-07 11:29:53 300.0 642.605968 0.0 17.006129 91829.177259 91800.0 92100.0 1401.0 646.155253 11:29:53
54 2.290349 91900.0 2017-07 673.256972 2017-07 11:29:54 300.0 656.879981 0.0 16.376991 91940.993369 91900.0 92200.0 1501.0 655.358664 11:29:54
55 2.413602 92000.0 2017-07 681.784414 2017-07 11:29:55 300.0 666.876650 0.0 14.907763 92044.556298 92000.0 92300.0 1601.0 663.323915 11:29:55
56 2.550697 92100.0 2017-07 684.911295 2017-07 11:29:56 300.0 672.208601 0.0 12.702694 92146.001222 92100.0 92400.0 1701.0 666.876530 11:29:56
57 2.705391 92100.0 2017-07 653.228559 2017-07 11:29:57 300.0 648.833643 0.0 4.394917 92166.238592 92200.0 92500.0 1701.0 628.744633 11:29:57
58 2.774549 92100.0 2017-07 625.890477 2017-07 11:29:58 300.0 627.435138 0.0 -1.544661 92135.563645 92100.0 92400.0 1701.0 613.072664 11:29:58
59 2.929183 92200.0 2017-07 615.618926 2017-07 11:29:59 300.0 618.797030 0.0 -3.178104 92202.260506 92200.0 92500.0 1801.0 614.847207 11:29:59
60 3.071042 92200.0 2017-07 589.386916 2017-07 11:30:00 300.0 596.880101 0.0 -7.493185 92209.032240 92200.0 92500.0 1801.0 586.445817 11:30:00

61 rows × 15 columns


In [96]:
%matplotlib inline
import matplotlib.pyplot as plt

In [105]:
plt.figure(figsize=(18,10))
plt.plot(df_shl_pm['bid'])
plt.plot(df_shl_pm['pred_price'])


Out[105]:
[<matplotlib.lines.Line2D at 0x7fe9792c4eb8>]

In [ ]:


In [ ]:

End of shl_sm


In [ ]:
# create global base price

# create predictino results dataframe: shl_pm
df_shl_pm = pd.DataFrame()

In [ ]:
# append into predictino results dataframe: shl_pm

In [ ]:


In [ ]:
df_shl_pm = pd.DataFrame()

In [ ]:
d = {
     'ccyy-mm' : df_history_ts_process['ccyy-mm'][1830]
    ,'time' : df_history_ts_process['time'][1830]
    ,'bid' : 1.8
}

In [ ]:
df_shl_pm = df_shl_pm.append(d, ignore_index=True)

In [ ]:
df_shl_pm

In [ ]:


In [ ]:


In [ ]:

Start of prediction module: shl_pm


In [ ]:
def

In [ ]:
# create global base price

# create predictino results dataframe: shl_pm
df_shl_pm = pd.DataFrame()

In [ ]:
# append into predictino results dataframe: shl_pm

In [ ]:


In [ ]:
df_shl_pm = pd.DataFrame()

In [ ]:
d = {
     'ccyy-mm' : df_history_ts_process['ccyy-mm'][1830]
    ,'time' : df_history_ts_process['time'][1830]
    ,'bid' : 1.8
}

In [ ]:
df_shl_pm = df_shl_pm.append(d, ignore_index=True)

In [ ]:
df_shl_pm

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:

End of prediction module: shl_pm


In [ ]:

[1] Import useful reference packages


In [ ]:
# from __future__ import print_function, division
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()
import pandas as pd
import operator
from scipy import interp
from itertools import cycle
from sklearn import svm
from sklearn.utils.validation import check_random_state
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.preprocessing import StandardScaler

from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.ensemble import BaggingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier

from sklearn.metrics import roc_curve, auc
from statsmodels.graphics.mosaicplot import mosaic
print(__doc__)

[2] Data pre-porcessing

Explore and visualize data

Parameters


In [ ]:
parm_calculate_base_price_second = 15 # Use the current month's bid-price as base-price at this seconds. Later to derive increment-price
parm_calculate_target_second = 7 # How many seconds in future to predict: target variable
parm_calculate_prev_bp = 15 # Number of previous price/increment to include, i.e. previous 2sec, 3sec, 4sec, 5sec ... 15sec
parm_calculate_mv = 15 # Number of  previous price/increment Moving Average to calculate, i.e. previous 2sec, 3sec, 4sec, 5sec ... 15sec
parm_calculate_prev_month = 3 # Number of previous month to include (need to remove earliest x month from training data)
print('parm_calculate_base_price_second : %3d seconds' % parm_calculate_base_price_second)
print('parm_calculate_target_second     : %3d seconds' % parm_calculate_target_second)
print('parm_calculate_prev_bp           : %3d seconds' % parm_calculate_prev_bp)
print('parm_calculate_mv                : %3d seconds' % parm_calculate_mv)
print('parm_calculate_prev_month        : %3d months' % parm_calculate_prev_month)

print('' )
parm_ts_cycle = 61 # seconds/records per month
print('parm_ts_cycle                    : %3d seconds' % parm_ts_cycle)
parm_ts_month = int(len(df_history_ts_process) / parm_ts_cycle)
print('parm_ts_month                    : %3d months' %  parm_ts_month)

parm_record_cut_row_head = max(parm_calculate_base_price_second, parm_calculate_prev_bp, parm_calculate_mv)
parm_record_cut_row_tail = parm_calculate_target_second
parm_record_cut_month_head = parm_calculate_prev_month + 1

parm_ts_valid_cycle = parm_ts_cycle - parm_record_cut_row_head - parm_record_cut_row_tail
print('parm_ts_valid_cycle              : %3d seconds' % parm_ts_valid_cycle)
parm_ts_valid_month = parm_ts_month - parm_record_cut_month_head
print('parm_ts_valid_month              : %3d months' % parm_ts_valid_month)

if parm_record_cut_month_head < 10:
    parm_record_cut_ccyy = pd.to_datetime('2015-0'+str(parm_record_cut_month_head))
else:
    parm_record_cut_ccyy = pd.to_datetime('2015-'+str(parm_record_cut_month_head))

print('' )
print('parm_record_cut_ccyy             : %s' % parm_record_cut_ccyy)

print('parm_record_cut_month_head       : %3d months' % parm_record_cut_month_head)
print('parm_record_cut_row_head         : %3d seconds' % parm_record_cut_row_head)
print('parm_record_cut_row_tail         : %3d seconds' % parm_record_cut_row_tail)
print('' )

In [ ]:
df_history_ts_process.head()

In [ ]:

Prepare derived features

Process: df_history_ts_process


In [ ]:
# date of current month
df_history_ts_process['date-curr'] = df_history_ts_process.apply(lambda row: pd.to_datetime(row['ccyy-mm']), axis=1)

# date of previous month
df_history_ts_process['date-prev'] = df_history_ts_process.apply(lambda row: row['date-curr'] - pd.offsets.MonthBegin(1), axis=1)


# Year
df_history_ts_process['year'] = df_history_ts_process.apply(lambda row: row['ccyy-mm'][0:4], axis=1)

# Month
df_history_ts_process['month'] = df_history_ts_process.apply(lambda row: row['ccyy-mm'][5:7], axis=1)

# Hour
df_history_ts_process['hour'] = df_history_ts_process.apply(lambda row: row['time'][0:2], axis=1)

# Minute
df_history_ts_process['minute'] = df_history_ts_process.apply(lambda row: row['time'][3:5], axis=1)

# Second
df_history_ts_process['second'] = df_history_ts_process.apply(lambda row: row['time'][6:8], axis=1)


# datetime of current month
df_history_ts_process['datetime-curr'] = df_history_ts_process.apply(lambda row: str(row['date-curr']) + ' ' + row['time'], axis=1)

# datetime of previous month
df_history_ts_process['datetime-prev'] = df_history_ts_process.apply(lambda row: str(row['date-prev']) + ' ' + row['time'], axis=1)

In [ ]:
df_history_ts_process.tail()

In [ ]:
# df_history_ts_process
# df_history_ts_process[1768:]

In [ ]:
# new ['base-price']
gap = 1 # only one new feature/column

for gap in range(1, gap+1):
    col_name = 'base-price'+str(parm_calculate_base_price_second)+'sec'
    col_name_base_price = col_name
    col_data = pd.DataFrame(columns=[col_name])
    print('Creating : ', col_name)  

    for month in range(0, parm_ts_month):
        for i in range(0, parm_ts_cycle):
            col_data.loc[month*parm_ts_cycle+i] = df_history_ts_process['bid-price'][month*parm_ts_cycle+parm_calculate_base_price_second]
  
    df_history_ts_process[col_name] = col_data

print('Total records processed : ', len(col_data))

In [ ]:
# df_history_ts_process
# df_history_ts_process[1768:]

In [ ]:
# new ['increment-price'] = ['bid-price'] - ['base-price']

df_history_ts_process['increment-price'] = df_history_ts_process.apply(lambda row: row['bid-price'] - row[col_name_base_price], axis=1)

In [ ]:
# df_history_ts_process
# df_history_ts_process[1768:]

In [ ]:
plt.figure()
plt.plot(df_history_ts_process['bid-price'])
plt.plot(df_history_ts_process[col_name_base_price])
plt.plot()
plt.figure()
plt.plot(df_history_ts_process['increment-price'])
plt.plot()

['increment-price-target']


In [ ]:
# previous N sec ['increment-price-target']

for gap in range(1, 2):
    col_name = 'increment-price-target'
    col_data = pd.DataFrame(columns=[col_name])
    print('Creating : ', col_name)  

    for month in range(0, parm_ts_month):
    #     print('month : ', month)
        for i in range(0, (parm_ts_cycle - parm_calculate_target_second)):
            col_data.loc[month*parm_ts_cycle+i] = df_history_ts_process['increment-price'][month*parm_ts_cycle+i+parm_calculate_target_second]
        for i in range((parm_ts_cycle - parm_calculate_target_second), parm_ts_cycle):
            col_data.loc[month*parm_ts_cycle+i] = 0
  
    df_history_ts_process[col_name] = col_data

print('Total records processed : ', len(col_data))

In [ ]:
plt.figure()
plt.plot(df_history_ts_process['increment-price'])
plt.plot(df_history_ts_process['increment-price-target'])
plt.plot()

plt.figure()
plt.plot(df_history_ts_process['increment-price'][1768:])
plt.plot(df_history_ts_process['increment-price-target'][1768:])
plt.plot()

In [ ]:


In [ ]:
# previous 'parm_calculate_prev_bp' sec ['increment-price']
gap = parm_calculate_prev_bp

for gap in range(1, gap+1):
    col_name = 'increment-price-prev'+str(gap)+'sec'
    col_data = pd.DataFrame(columns=[col_name])
#     col_data_zeros = pd.DataFrame({col_name: np.zeros(gap)})
    print('Creating : ', col_name)  

    for month in range(0, parm_ts_month):
    #     print('month : ', month)
#         col_data.append(col_data_zeros)
        for i in range(0, gap):
            col_data.loc[month*parm_ts_cycle+i] = 0
        for i in range(gap, parm_ts_cycle):
            col_data.loc[month*parm_ts_cycle+i] = df_history_ts_process['increment-price'][month*parm_ts_cycle+i-gap]
  
    df_history_ts_process[col_name] = col_data

print('Total records processed : ', len(col_data))

In [ ]:
# previous 'parm_calculate_mv' sec Moving Average ['increment-price']

gap = parm_calculate_mv

for gap in range(2, gap+1): # MV starts from 2 seconds, till parm_calculate_mv
    col_name = 'increment-price-mv'+str(gap)+'sec'
    col_data = pd.DataFrame(columns=[col_name])
    print('Creating : ', col_name)  

    for month in range(0, parm_ts_month):
    #     print('month : ', month)
        for i in range(0, gap):
            col_data.loc[month*parm_ts_cycle+i] = 0
        for i in range(gap, parm_ts_cycle):
            col_data.loc[month*parm_ts_cycle+i] = \
            np.mean(df_history_ts_process['increment-price'][month*parm_ts_cycle+i-gap:month*parm_ts_cycle+i])
  
    df_history_ts_process[col_name] = col_data

print('Total records processed : ', len(col_data))

In [ ]:
# df_history_ts_process[1768:]

In [ ]:
plt.figure()
plt.plot(df_history_ts_process['increment-price'][1768:])
plt.plot(df_history_ts_process['increment-price-prev3sec'][1768:])
plt.plot(df_history_ts_process['increment-price-prev7sec'][1768:])
plt.plot(df_history_ts_process['increment-price-prev11sec'][1768:])
plt.plot(df_history_ts_process['increment-price-prev15sec'][1768:])
plt.plot()

In [ ]:
plt.figure()
plt.plot(df_history_ts_process['increment-price'][1768:])
plt.plot(df_history_ts_process['increment-price-mv3sec'][1768:])
plt.plot(df_history_ts_process['increment-price-mv7sec'][1768:])
plt.plot(df_history_ts_process['increment-price-mv11sec'][1768:])
plt.plot(df_history_ts_process['increment-price-mv15sec'][1768:])
plt.plot()

In [ ]:

Process: df_history_table_process


In [ ]:
df_history_table_process.tail()

In [ ]:
# date of current month
df_history_table_process['date-curr'] = df_history_table_process.apply(lambda row: pd.to_datetime(row['ccyy-mm']), axis=1)
df_history_table_process['d-avg-low-price'] = df_history_table_process.apply(lambda row: row['deal-price-avg'] - row['deal-price-low'], axis=1)
df_history_table_process['ratio-bid'] = df_history_table_process.apply(lambda row: row['volume-plate'] / row['volume-bidder'], axis=1)

In [ ]:
df_history_table_process.tail()

Merge dataframe


In [ ]:
df_history_ts_process_tmp2 = df_history_ts_process.copy()

In [ ]:
df_history_ts_process = df_history_ts_process_tmp2.copy()

In [ ]:
# look up current month table data: 'volume-plate', 'ratio-bid'
df_history_ts_process = pd.merge(df_history_ts_process, df_history_table_process[['date-curr', 'volume-plate', 'ratio-bid']], how = 'left', left_on = 'date-curr', right_on = 'date-curr', suffixes=['', '_table'])

In [ ]:
for i in range(0, len(df_history_ts_process.columns)): print(df_history_ts_process.columns[i])

In [ ]:
# look up pevious month table data: 'volume-plate', 'ratio-bid', 'deal-early-second', 'deal-price-avg', 'd-avg-low-price'
df_history_ts_process = pd.merge(df_history_ts_process, df_history_table_process[['date-curr', 'volume-plate', 'ratio-bid', 'deal-early-second', 'deal-price-avg', 'd-avg-low-price']], how = 'left', left_on = 'date-prev', right_on = 'date-curr', suffixes=['', '_m0'])

In [ ]:
df_history_ts_process['d-increment-avg-low-price_m0'] = df_history_ts_process.apply(lambda row: row['increment-price'] - row['d-avg-low-price'], axis=1)

In [ ]:
for i in range(0, len(df_history_ts_process.columns)): print(df_history_ts_process.columns[i])

Shift to copy previous 'parm_calculate_prev_month' month's data into current row


In [ ]:
# df_history_ts_process = df_history_ts_process_lookup.copy()

In [ ]:
df_history_ts_process_lookup = df_history_ts_process.copy()
df_history_ts_process_lookup.tail()

In [ ]:
# _m1
df_history_ts_process = pd.merge(df_history_ts_process, df_history_ts_process_lookup[[ \
        'datetime-curr', 'datetime-prev', 
        'base-price15sec', 'increment-price', 'increment-price-target',
        'increment-price-prev1sec', 'increment-price-prev2sec',
        'increment-price-prev3sec', 'increment-price-prev4sec',
        'increment-price-prev5sec', 'increment-price-prev6sec',
        'increment-price-prev7sec', 'increment-price-prev8sec',
        'increment-price-prev9sec', 'increment-price-prev10sec',
        'increment-price-prev11sec', 'increment-price-prev12sec',
        'increment-price-prev13sec', 'increment-price-prev14sec',
        'increment-price-prev15sec', 
        'increment-price-mv2sec',
        'increment-price-mv3sec', 'increment-price-mv4sec',
        'increment-price-mv5sec', 'increment-price-mv6sec',
        'increment-price-mv7sec', 'increment-price-mv8sec',
        'increment-price-mv9sec', 'increment-price-mv10sec',
        'increment-price-mv11sec', 'increment-price-mv12sec',
        'increment-price-mv13sec', 'increment-price-mv14sec',
        'increment-price-mv15sec', 
        'volume-plate_m0', 
        'ratio-bid_m0', 
        'deal-early-second',
        'deal-price-avg',
        'd-avg-low-price',
        'd-increment-avg-low-price_m0'
        ]], how = 'left', left_on = 'datetime-prev', right_on = 'datetime-curr', suffixes=['', '_m1'])
df_history_ts_process.tail()

In [ ]:
# _m2
df_history_ts_process = pd.merge(df_history_ts_process, df_history_ts_process_lookup[[ \
        'datetime-curr', 'datetime-prev', 
        'base-price15sec', 'increment-price', 'increment-price-target',
        'increment-price-prev1sec', 'increment-price-prev2sec',
        'increment-price-prev3sec', 'increment-price-prev4sec',
        'increment-price-prev5sec', 'increment-price-prev6sec',
        'increment-price-prev7sec', 'increment-price-prev8sec',
        'increment-price-prev9sec', 'increment-price-prev10sec',
        'increment-price-prev11sec', 'increment-price-prev12sec',
        'increment-price-prev13sec', 'increment-price-prev14sec',
        'increment-price-prev15sec', 
        'increment-price-mv2sec',
        'increment-price-mv3sec', 'increment-price-mv4sec',
        'increment-price-mv5sec', 'increment-price-mv6sec',
        'increment-price-mv7sec', 'increment-price-mv8sec',
        'increment-price-mv9sec', 'increment-price-mv10sec',
        'increment-price-mv11sec', 'increment-price-mv12sec',
        'increment-price-mv13sec', 'increment-price-mv14sec',
        'increment-price-mv15sec', 
        'volume-plate_m0', 
        'ratio-bid_m0', 
        'deal-early-second',
        'deal-price-avg',
        'd-avg-low-price',
        'd-increment-avg-low-price_m0'                                                                                   
        ]], how = 'left', left_on = 'datetime-prev_m1', right_on = 'datetime-curr', suffixes=['', '_m2'])
df_history_ts_process.tail()

In [ ]:
# _m3
df_history_ts_process = pd.merge(df_history_ts_process, df_history_ts_process_lookup[[ \
        'datetime-curr', 'datetime-prev', 
        'base-price15sec', 'increment-price', 'increment-price-target',
        'increment-price-prev1sec', 'increment-price-prev2sec',
        'increment-price-prev3sec', 'increment-price-prev4sec',
        'increment-price-prev5sec', 'increment-price-prev6sec',
        'increment-price-prev7sec', 'increment-price-prev8sec',
        'increment-price-prev9sec', 'increment-price-prev10sec',
        'increment-price-prev11sec', 'increment-price-prev12sec',
        'increment-price-prev13sec', 'increment-price-prev14sec',
        'increment-price-prev15sec', 
        'increment-price-mv2sec',
        'increment-price-mv3sec', 'increment-price-mv4sec',
        'increment-price-mv5sec', 'increment-price-mv6sec',
        'increment-price-mv7sec', 'increment-price-mv8sec',
        'increment-price-mv9sec', 'increment-price-mv10sec',
        'increment-price-mv11sec', 'increment-price-mv12sec',
        'increment-price-mv13sec', 'increment-price-mv14sec',
        'increment-price-mv15sec', 
        'volume-plate_m0', 
        'ratio-bid_m0', 
        'deal-early-second',
        'deal-price-avg',
        'd-avg-low-price',
        'd-increment-avg-low-price_m0'                                                                                  
        ]], how = 'left', left_on = 'datetime-prev_m2', right_on = 'datetime-curr', suffixes=['', '_m3'])
df_history_ts_process.tail()

In [ ]:
plt.figure()
plt.plot(df_history_ts_process['increment-price-mv10sec'][1768:])
plt.plot(df_history_ts_process['increment-price-mv10sec_m1'][1768:])
plt.plot(df_history_ts_process['increment-price-mv10sec_m2'][1768:])
plt.plot(df_history_ts_process['increment-price-mv10sec_m3'][1768:])
plt.figure()
plt.plot(df_history_ts_process['increment-price-prev10sec'][1768:])
plt.plot(df_history_ts_process['increment-price-prev10sec_m1'][1768:])
plt.plot(df_history_ts_process['increment-price-prev10sec_m2'][1768:])
plt.plot(df_history_ts_process['increment-price-prev10sec_m3'][1768:])
plt.figure()
plt.plot(df_history_ts_process['increment-price'][1768:])
plt.plot(df_history_ts_process['increment-price_m1'][1768:])
plt.plot(df_history_ts_process['increment-price_m2'][1768:])
plt.plot(df_history_ts_process['increment-price_m3'][1768:])
plt.figure()
plt.plot(df_history_ts_process['increment-price-target'][1768:])
plt.plot(df_history_ts_process['increment-price-target_m1'][1768:])
plt.plot(df_history_ts_process['increment-price-target_m2'][1768:])
plt.plot(df_history_ts_process['increment-price-target_m3'][1768:])

plt.plot()

In [ ]:

Housekeeping to remove some invald data during pre-processing


In [ ]:
for i in range(0, len(df_history_ts_process.columns)): print(df_history_ts_process.columns[i])

In [ ]:
# housekeeping: delete some columns
# df_history_ts_process.drop('date-curr_y', axis=1, inplace=True)

In [ ]:
parm_record_cut_ccyy

In [ ]:
# remove first 'parm_record_cut_ccyy' months from dataset
df_history_ts_process = df_history_ts_process[df_history_ts_process['date-curr'] > parm_record_cut_ccyy]

In [ ]:
# total 61 seconds/rows per month:
# remove first 'parm_record_cut_row_head' reconds
# remove last 'parm_record_cut_row_tail' reconds
df_history_ts_process = df_history_ts_process[df_history_ts_process['second'] >= str(parm_record_cut_row_head) ]
df_history_ts_process = df_history_ts_process[df_history_ts_process['second'] <= str(60 - parm_record_cut_row_tail) ]
# df_history_ts_process = df_history_ts_process[df_history_ts_process['second'] > parm_record_cut_row_head ]

In [ ]:
# Reset index after housekeeping
df_history_ts_process = df_history_ts_process.reset_index(drop=True)

In [ ]:
df_history_ts_process.head()

In [ ]:
df_history_ts_process.tail()

In [ ]:
plt.figure()
plt.plot(df_history_ts_process['increment-price'][974:])
plt.plot(df_history_ts_process['increment-price-mv3sec'][974:])
plt.plot(df_history_ts_process['increment-price-mv7sec'][974:])
plt.plot(df_history_ts_process['increment-price-mv11sec'][974:])
plt.plot(df_history_ts_process['increment-price-mv15sec'][974:])
plt.figure()
plt.plot(df_history_ts_process['increment-price-mv15sec'][974:])
plt.plot(df_history_ts_process['increment-price-mv15sec_m1'][974:])
plt.plot(df_history_ts_process['increment-price-mv15sec_m2'][974:])
plt.plot(df_history_ts_process['increment-price-mv15sec_m3'][974:])
plt.plot()

In [ ]:

[3] Modeling Part 2: Python scikit-learn

Models to use:

  • GradientBoostingClassifier
  • RandomForestClassifier
  • AdaBoostClassifier
  • ExtraTreesClassifier
  • BaggingClassifier
  • LogisticRegression
  • SVM kernal RBF
  • SVM kernal Linear
  • KNeighborsClassifier

Import pre-processed data


In [ ]:
# plt.plot(df_history_ts_process['d-avg-low-price'])
# plt.figure()
# plt.figure()
# plt.plot(df_history_ts_process['d-avg-low-price_m1'])
# plt.figure()
# plt.plot(df_history_ts_process['d-avg-low-price_m2'])
# plt.figure()
# plt.plot(df_history_ts_process['d-avg-low-price_m3'])

In [ ]:
for i in range(0, len(df_history_ts_process.columns)): print(df_history_ts_process.columns[i])

In [ ]:
X = df_history_ts_process[[
#          ,'ccyy-mm'
#         ,'time'
#         ,'bid-price'
#         ,'date-curr'
#         ,'date-prev'
#         ,'year'
         'month'
#         ,'hour'
#         ,'minute'
        ,'second'
#         ,'datetime-curr'
#         ,'datetime-prev'
        ,'base-price15sec'
        ,'increment-price'
#         ,'increment-price-target'   # <<<<<<< This is target 
        ,'increment-price-prev1sec'
        ,'increment-price-prev2sec'
        ,'increment-price-prev3sec'
        ,'increment-price-prev4sec'
        ,'increment-price-prev5sec'
        ,'increment-price-prev6sec'
        ,'increment-price-prev7sec'
        ,'increment-price-prev8sec'
        ,'increment-price-prev9sec'
        ,'increment-price-prev10sec'
        ,'increment-price-prev11sec'
        ,'increment-price-prev12sec'
        ,'increment-price-prev13sec'
        ,'increment-price-prev14sec'
        ,'increment-price-prev15sec'
        ,'increment-price-mv2sec'
        ,'increment-price-mv3sec'
        ,'increment-price-mv4sec'
        ,'increment-price-mv5sec'
        ,'increment-price-mv6sec'
        ,'increment-price-mv7sec'
        ,'increment-price-mv8sec'
        ,'increment-price-mv9sec'
        ,'increment-price-mv10sec'
        ,'increment-price-mv11sec'
        ,'increment-price-mv12sec'
        ,'increment-price-mv13sec'
        ,'increment-price-mv14sec'
        ,'increment-price-mv15sec'
        ,'volume-plate'
        ,'ratio-bid'
#         ,'date-curr_m0'
        ,'volume-plate_m0'
        ,'ratio-bid_m0'
        ,'deal-early-second'
        ,'deal-price-avg'
        ,'d-avg-low-price'
        ,'d-increment-avg-low-price_m0'
    
#         ,'datetime-curr_m1'
#         ,'datetime-prev_m1'
        ,'base-price15sec_m1'
        ,'increment-price_m1'
        ,'increment-price-target_m1'
        ,'increment-price-prev1sec_m1'
        ,'increment-price-prev2sec_m1'
        ,'increment-price-prev3sec_m1'
        ,'increment-price-prev4sec_m1'
        ,'increment-price-prev5sec_m1'
        ,'increment-price-prev6sec_m1'
        ,'increment-price-prev7sec_m1'
        ,'increment-price-prev8sec_m1'
        ,'increment-price-prev9sec_m1'
        ,'increment-price-prev10sec_m1'
        ,'increment-price-prev11sec_m1'
        ,'increment-price-prev12sec_m1'
        ,'increment-price-prev13sec_m1'
        ,'increment-price-prev14sec_m1'
        ,'increment-price-prev15sec_m1'
        ,'increment-price-mv2sec_m1'
        ,'increment-price-mv3sec_m1'
        ,'increment-price-mv4sec_m1'
        ,'increment-price-mv5sec_m1'
        ,'increment-price-mv6sec_m1'
        ,'increment-price-mv7sec_m1'
        ,'increment-price-mv8sec_m1'
        ,'increment-price-mv9sec_m1'
        ,'increment-price-mv10sec_m1'
        ,'increment-price-mv11sec_m1'
        ,'increment-price-mv12sec_m1'
        ,'increment-price-mv13sec_m1'
        ,'increment-price-mv14sec_m1'
        ,'increment-price-mv15sec_m1'
        ,'volume-plate_m0_m1'
        ,'ratio-bid_m0_m1'
        ,'deal-early-second_m1'
        ,'deal-price-avg_m1'
        ,'d-avg-low-price_m1'
        ,'d-increment-avg-low-price_m0_m1'

#         ,'datetime-curr_m2'
#         ,'datetime-prev_m2'
        ,'base-price15sec_m2'
        ,'increment-price_m2'
        ,'increment-price-target_m2'
        ,'increment-price-prev1sec_m2'
        ,'increment-price-prev2sec_m2'
        ,'increment-price-prev3sec_m2'
        ,'increment-price-prev4sec_m2'
        ,'increment-price-prev5sec_m2'
        ,'increment-price-prev6sec_m2'
        ,'increment-price-prev7sec_m2'
        ,'increment-price-prev8sec_m2'
        ,'increment-price-prev9sec_m2'
        ,'increment-price-prev10sec_m2'
        ,'increment-price-prev11sec_m2'
        ,'increment-price-prev12sec_m2'
        ,'increment-price-prev13sec_m2'
        ,'increment-price-prev14sec_m2'
        ,'increment-price-prev15sec_m2'
        ,'increment-price-mv2sec_m2'
        ,'increment-price-mv3sec_m2'
        ,'increment-price-mv4sec_m2'
        ,'increment-price-mv5sec_m2'
        ,'increment-price-mv6sec_m2'
        ,'increment-price-mv7sec_m2'
        ,'increment-price-mv8sec_m2'
        ,'increment-price-mv9sec_m2'
        ,'increment-price-mv10sec_m2'
        ,'increment-price-mv11sec_m2'
        ,'increment-price-mv12sec_m2'
        ,'increment-price-mv13sec_m2'
        ,'increment-price-mv14sec_m2'
        ,'increment-price-mv15sec_m2'
        ,'volume-plate_m0_m2'
        ,'ratio-bid_m0_m2'
        ,'deal-early-second_m2'
        ,'deal-price-avg_m2'
        ,'d-avg-low-price_m2'
        ,'d-increment-avg-low-price_m0_m2'

#         ,'datetime-curr_m3'
#         ,'datetime-prev_m3'
        ,'base-price15sec_m3'
        ,'increment-price_m3'
        ,'increment-price-target_m3'
        ,'increment-price-prev1sec_m3'
        ,'increment-price-prev2sec_m3'
        ,'increment-price-prev3sec_m3'
        ,'increment-price-prev4sec_m3'
        ,'increment-price-prev5sec_m3'
        ,'increment-price-prev6sec_m3'
        ,'increment-price-prev7sec_m3'
        ,'increment-price-prev8sec_m3'
        ,'increment-price-prev9sec_m3'
        ,'increment-price-prev10sec_m3'
        ,'increment-price-prev11sec_m3'
        ,'increment-price-prev12sec_m3'
        ,'increment-price-prev13sec_m3'
        ,'increment-price-prev14sec_m3'
        ,'increment-price-prev15sec_m3'
        ,'increment-price-mv2sec_m3'
        ,'increment-price-mv3sec_m3'
        ,'increment-price-mv4sec_m3'
        ,'increment-price-mv5sec_m3'
        ,'increment-price-mv6sec_m3'
        ,'increment-price-mv7sec_m3'
        ,'increment-price-mv8sec_m3'
        ,'increment-price-mv9sec_m3'
        ,'increment-price-mv10sec_m3'
        ,'increment-price-mv11sec_m3'
        ,'increment-price-mv12sec_m3'
        ,'increment-price-mv13sec_m3'
        ,'increment-price-mv14sec_m3'
        ,'increment-price-mv15sec_m3'
        ,'volume-plate_m0_m3'
        ,'ratio-bid_m0_m3'
        ,'deal-early-second_m3'
        ,'deal-price-avg_m3'
        ,'d-avg-low-price_m3'
        ,'d-increment-avg-low-price_m0_m3'
        ]]

X_col = X.columns # get the column list

# X = StandardScaler().fit_transform(X.as_matrix())
X = X.as_matrix()

# y = StandardScaler().fit_transform(df_wnv_raw[['increment-price-target']].as_matrix()).reshape(len(df_wnv_raw),)
y = df_history_ts_process[['increment-price-target']].as_matrix().reshape(len(df_history_ts_process),)

In [ ]:
X_col

In [ ]:
plt.figure()
plt.plot(X)
plt.figure()
plt.plot(y)

[4] Evaluation

K-fold Cross-Validation


In [ ]:
rng = check_random_state(0)

In [ ]:
# GB
classifier_GB = GradientBoostingRegressor(n_estimators=1500, # score: 0.94608 (AUC 0.81419), learning_rate=0.001, max_features=8 <<< Best
#                                    loss='deviance',
#                                    subsample=1,
#                                    max_depth=5,
#                                    min_samples_split=20,
                                   learning_rate=0.002,
#                                    max_features=10,
                                   random_state=rng)

In [ ]:
# AB
classifier_AB = AdaBoostRegressor(n_estimators=1500, # score: 0.93948 (AUC 0.88339), learning_rate=0.004 <<< Best
                                   learning_rate=0.002,
                                   random_state=rng)

In [ ]:
# RF
classifier_RF = RandomForestRegressor(n_estimators=1500, # score: 0.94207 (AUC 0.81870), max_depth=3, min_samples_split=20, <<< Best
#                                     max_features=10,
#                                     max_depth=3,
#                                     min_samples_split=20,
                                    random_state=rng)

In [ ]:
# ET
classifier_ET = ExtraTreesRegressor(n_estimators=1000, # score: 0.94655 (AUC 0.84364), max_depth=3, min_samples_split=20, max_features=10 <<< Best
#                                     max_depth=3,
#                                     min_samples_split=20,
#                                     max_features=10,
                                    random_state=rng)

In [ ]:
# BG
classifier_BG = BaggingRegressor(n_estimators=500, # score: 0.70725 (AUC 0.63729) <<< Best
#                                     max_features=10,
                                    random_state=rng)

LR


In [ ]:
classifier_LR = LinearRegression() # score: 0.90199 (AUC 0.80569)

SVM Linear


In [ ]:
# classifier_SVCL = svm.SVC(kernel='linear', probability=True, random_state=rng) # score: 0.89976 (AUC 0.70524)
classifier_SVRL = svm.SVR(kernel='linear') # score: 0.89976 (AUC 0.70524)

SVM


In [ ]:
classifier_SVRR = svm.SVR(kernel='rbf') # score: 0.80188 (AUC 0.50050)
# classifier_SVRR = svm.SVR(kernel='poly') # score: 0.80188 (AUC 0.50050)

KNN


In [ ]:
classifier_KNN = KNeighborsRegressor(n_neighbors=2) # score: 0.94018 (AUC 0.72792)
cv = cross_val_score(classifier_KNN,
                            X,
                            y,
                            cv=StratifiedKFold(parm_ts_valid_month))
print('KNN CV score: {0:.5f}'.format(cv.mean()))

In [ ]:

Select Model


In [ ]:
# classifier = classifier_GB     # 219.099617786
# classifier = classifier_AB     # 230.101439444
classifier = classifier_RF     # 197.955555556
# classifier = classifier_ET     # 
# classifier = classifier_BG     # 
# classifier = classifier_LR     # 
# classifier = classifier_SVRL   # 
# classifier = classifier_SVRR   #

Split Data


In [ ]:
n_splits = parm_ts_valid_cycle
print('cycle seconds : %d' % n_splits)
# n_splits=54 # 19 seconds/records for each bidding month
# n_splits=19 # 19 seconds/records for each bidding month
n_fold = parm_ts_valid_month
print('cycle month   : %d' % n_fold)


# X_train_1 = X[0:(len(X)-batch*n_splits)]
# y_train_1 = y[0:(len(X)-batch*n_splits)]

# X_test_1 = X[(len(X)-batch*n_splits):((len(X)-batch*n_splits)+n_splits)]
# y_test_1 = y[(len(X)-batch*n_splits):((len(X)-batch*n_splits)+n_splits)]

Cross-Validation


In [ ]:
n_fold=7

In [ ]:
y_pred = {}
y_test = {}

y_pred_org = {}
y_test_org = {}

i = 0
for batch in range(1, n_fold):
    X_train_1 = X[0:(len(X)-batch*n_splits)]
    y_train_1 = y[0:(len(X)-batch*n_splits)]
    X_test_1  = X[(len(X)-batch*n_splits):((len(X)-batch*n_splits)+n_splits)]
    y_test_1  = y[(len(X)-batch*n_splits):((len(X)-batch*n_splits)+n_splits)]
    print(len(X_train_1))
    
    # ReScale
    ScalerX = StandardScaler()
    ScalerX.fit(X_train_1)
    X_train_1 = ScalerX.transform(X_train_1)
    X_test_1  = ScalerX.transform(X_test_1)
    
    ScalerY = StandardScaler()
    ScalerY.fit(y_train_1.reshape(-1, 1))
    y_train_1 = ScalerY.transform(y_train_1.reshape(-1, 1))
    y_test_1  = ScalerY.transform(y_test_1.reshape(-1, 1))
    
    y_pred[i] = classifier.fit(X_train_1, y_train_1).predict(X_test_1)
    y_test[i] = y_test_1  

    y_pred_org[i] = ScalerY.inverse_transform(y_pred[i])
    y_test_org[i] = ScalerY.inverse_transform(y_test[i])
    
    plt.figure()
    plt.plot(y_train_1)
    plt.plot()
    plt.figure()
    plt.plot(y_test[i])
    plt.plot(y_pred[i])
    plt.plot()
    i += 1

no inverse-scale


In [ ]:
k = []
for i in range(0, len(y_test)):
    k.append(np.mean(np.sqrt(np.square(y_test[i] - y_pred[i]))))

k_mean = np.mean(k)

print(k_mean)
print()
print(k)

In [ ]:
# 49~51 second predicts 56~58 second
k = []
for i in range(0, len(y_test)):
    k.append(np.mean(np.sqrt(np.square(y_test[i][34:36] - y_pred[i][34:36]))))

k_mean = np.mean(k)

print(k_mean)
print()
print(k)

inverse-scale


In [ ]:
k = []
for i in range(0, len(y_test)):
    k.append(np.mean(np.sqrt(np.square(y_test_org[i] - y_pred_org[i]))))

k_mean = np.mean(k)

print(k_mean)
print()
print(k)

In [ ]:
# 49~51 second predicts 56~58 second
k = []
for i in range(0, len(y_test)):
    k.append(np.mean(np.sqrt(np.square(y_test_org[i][34:36] - y_pred_org[i][34:36]))))

k_mean = np.mean(k)

print(k_mean)
print()
print(k)

In [ ]:
# 48 second predicts 56 second
k = []
for i in range(0, len(y_test)):
    k.append(np.mean(np.sqrt(np.square(y_test_org[i][33:34] - y_pred_org[i][33:34]))))

k_mean = np.mean(k)

print(k_mean)
print()
print(k)

In [ ]:
# 49 second predicts 56 second
k = []
for i in range(0, len(y_test)):
    k.append(np.mean(np.sqrt(np.square(y_test_org[i][34:35] - y_pred_org[i][34:35]))))

k_mean = np.mean(k)

print(k_mean)
print()
print(k)

In [ ]:
# 50 second predicts 57 second
k = []
for i in range(0, len(y_test)):
    k.append(np.mean(np.sqrt(np.square(y_test_org[i][35:36] - y_pred_org[i][35:36]))))

k_mean = np.mean(k)

print(k_mean)
print()
print(k)

In [ ]:
# 51 second predicts 58 second
k = []
for i in range(0, len(y_test)):
    k.append(np.mean(np.sqrt(np.square(y_test_org[i][36:37] - y_pred_org[i][36:37]))))

k_mean = np.mean(k)

print(k_mean)
print()
print(k)

In [ ]:
# 52 second predicts 59 second
k = []
for i in range(0, len(y_test)):
    k.append(np.mean(np.sqrt(np.square(y_test_org[i][37:38] - y_pred_org[i][37:38]))))

k_mean = np.mean(k)

print(k_mean)
print()
print(k)

In [ ]:
# 53 second predicts 60 second
k = []
for i in range(0, len(y_test)):
    k.append(np.mean(np.sqrt(np.square(y_test_org[i][38:39] - y_pred_org[i][38:39]))))

k_mean = np.mean(k)

print(k_mean)
print()
print(k)

In [ ]:
plt.plot(y_test_org[0])
plt.plot(y_pred_org[0])

In [ ]:
plt.plot(k)

In [ ]:


In [ ]:
# plt.plot(df_history_ts_process['increment-price-target'][819:])
plt.plot(df_history_ts_process['increment-price'][819:])
plt.plot(df_history_ts_process['d-increment-avg-low-price_m0'][819:])
plt.plot(df_history_ts_process['increment-price'][819:] - df_history_ts_process['d-increment-avg-low-price_m0'][819:])
plt.figure()
plt.plot(df_history_ts_process['d-increment-avg-low-price_m0'][819:])
plt.plot(df_history_ts_process['d-increment-avg-low-price_m0_m1'][819:])
plt.plot(df_history_ts_process['d-increment-avg-low-price_m0_m2'][819:])
plt.plot(df_history_ts_process['d-increment-avg-low-price_m0_m3'][819:])

Model Feature Importances:


In [ ]:
def util_feature_importances(classifier):
    print(classifier)
    dict_importance ={}
    for i in range(len(X_col)):
        dict_importance[X_col[i]] = classifier.feature_importances_[i]
        dict_importance_sort = sorted(dict_importance.items(), key=operator.itemgetter(1), reverse=True)
    return dict_importance_sort

In [ ]:
util_feature_importances(classifier_GB)

In [ ]:
util_feature_importances(classifier_RF)

In [ ]:
util_feature_importances(classifier_AB)

In [ ]:
util_feature_importances(classifier_ET)

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


The End