training module: shl_tm
prediction module: shl_pm
simulation module: shl_sm
misc module: shl_mm
historical bidding price, per second, time series
live bidding price, per second, time series
parm_si (seasonality index per second)
parm_month (parameter like alpha, beta, gamma, etc. per month)
In [27]:
    
%matplotlib inline
import matplotlib.pyplot as plt
    
In [1]:
    
import pandas as pd
    
In [2]:
    
df_history_ts_process = pd.read_csv('data/history_ts.csv') 
df_history_ts_process.tail()
    
    Out[2]:
In [3]:
    
df_history_table_process = pd.read_csv('data/history_table.csv') 
df_history_table_process.tail()
    
    Out[3]:
In [6]:
    
df_parm_si = pd.read_csv('data/parm_si.csv') 
# print(df_parm_si[(df_parm_si['ccyy-mm'] == '2017-07') & (df_parm_si['time'] == '11:29:00')].iloc[0]['si'])
df_parm_si.tail()
    
    Out[6]:
In [7]:
    
df_parm_month = pd.read_csv('data/parm_month.csv') 
# print(df_parm_month[(df_parm_month['ccyy-mm'] == '2017-07') & (df_parm_month['time'] == '11:29:00')].iloc[0]['di'])
df_parm_month.tail()
    
    Out[7]:
In [8]:
    
# function to fetch Seasonality-Index
def fetech_si(ccyy_mm, time, df_parm_si):
#     return df_parm_si[(df_parm_si['ccyy-mm'] == '2017-09') & (df_parm_si['time'] == '11:29:00')]
    return df_parm_si[(df_parm_si['ccyy-mm'] == ccyy_mm) & (df_parm_si['time'] == time)].iloc[0]['si']
    
In [9]:
    
# function to fetch Dynamic-Increment
def fetech_di(ccyy_mm, df_parm_month):
#     print(df_parm_month[df_parm_month['ccyy-mm'] == '2017-07'].iloc[0]['di'])
    return df_parm_month[df_parm_month['ccyy-mm'] == ccyy_mm].iloc[0]['di']
    
In [62]:
    
def get_previous_n_sec_time_as_str(df_time_field, n):
    return str((pd.to_datetime(df_time_field, format='%H:%M:%S') - pd.Timedelta(seconds=n)).time())
# print(get_previous_n_sec_time_as_str('11:29:57',3))
def get_future_n_sec_time_as_str(df_time_field, n):
    return str((pd.to_datetime(df_time_field, format='%H:%M:%S') - pd.Timedelta(seconds=-n)).time())
# print(get_future_n_sec_time_as_str('11:29:57',3))
    
    
In [ ]:
    
    
In [42]:
    
# which month to predict?
global_parm_ccyy_mm = '2017-07' 
# create global base price
global_parm_base_price = 10000000
# create predictino results dataframe: shl_pm
# df_shl_pm = pd.DataFrame()
global_parm_dynamic_increment = fetech_di(global_parm_ccyy_mm, df_parm_month)
global_parm_alpha = df_parm_month[df_parm_month['ccyy-mm'] == global_parm_ccyy_mm].iloc[0]['alpha']
global_parm_beta  = df_parm_month[df_parm_month['ccyy-mm'] == global_parm_ccyy_mm].iloc[0]['beta']
global_parm_gamma = df_parm_month[df_parm_month['ccyy-mm'] == global_parm_ccyy_mm].iloc[0]['gamma']
global_parm_sec57_weight = df_parm_month[df_parm_month['ccyy-mm'] == global_parm_ccyy_mm].iloc[0]['sec57-weight']
global_parm_month_weight = df_parm_month[df_parm_month['ccyy-mm'] == global_parm_ccyy_mm].iloc[0]['month-weight']
global_parm_short_weight = df_parm_month[df_parm_month['ccyy-mm'] == global_parm_ccyy_mm].iloc[0]['short-weight']
print('=================================================')
print('  Global Parameters for Month : %s' % global_parm_ccyy_mm)
print('-------------------------------------------------')
print('global_parm_dynamic_increment : %d' % global_parm_dynamic_increment)
print('global_parm_alpha             : %0.15f' % global_parm_alpha) # used in forecasting
print('global_parm_beta              : %0.15f' % global_parm_beta)  # used in forecasting
print('global_parm_gamma             : %0.15f' % global_parm_gamma) # used in forecasting
print('global_parm_sec57_weight      : %f' % global_parm_sec57_weight) # used in training a model
print('global_parm_month_weight      : %f' % global_parm_month_weight) # used in training a model
print('global_parm_short_weight      : %f' % global_parm_short_weight) # used in training a model
print('=================================================')
# plot seasonality index
# print(df_parm_si[(df_parm_si['ccyy-mm'] == '2017-07')]['si'])
plt.figure(figsize=(6,3))
plt.plot(df_parm_si[(df_parm_si['ccyy-mm'] == '2017-07')]['si'])
    
    
    Out[42]:
    
In [ ]:
    
    
In [154]:
    
# 11:29:00~11:29:50
for i in range(1830, 1830+51): # use July 2015 data as simulatino
    print('\n<<<< Record No.: %5d >>>>' % i)
    print(df_history_ts_process['ccyy-mm'][i]) # format: ccyy-mm
    print(df_history_ts_process['time'][i]) # format: hh:mm:ss
    print(df_history_ts_process['bid-price'][i]) # format: integer
#     print(df_history_ts_process['ref-price'][i])
    
    # capture & calculate 11:29:00 bid price - 1 = base price
    if df_history_ts_process['time'][i] == '11:29:00':
        global_parm_base_price = df_history_ts_process['bid-price'][i] -1 
        print('#### global_parm_base_price : %d ####' % global_parm_base_price)
        
    print('---- Pre-Process ---')
    # pre-process: ccyy-mm-hh:mm:ss
    current_datetime = df_history_ts_process['ccyy-mm'][i] + ' ' + df_history_ts_process['time'][i]
    current_price4pm = df_history_ts_process['bid-price'][i] -  global_parm_base_price
    print('#### current_datetime   : %s ####' %  current_datetime)
    print('#### current_price4pm   : %d ####' % current_price4pm)
    
    # get Seasonality-Index
    current_si = fetech_si(df_history_ts_process['ccyy-mm'][i]
                                         ,df_history_ts_process['time'][i]
                                         ,df_parm_si)
    print('#### current_si         : %0.10f ####' %  current_si)
    # get de-seasoned price: price4pmsi
    current_price4pmsi = current_price4pm / current_si
    print('#### current_price4pmsi : %0.10f ####' % current_price4pmsi)
    
    if df_history_ts_process['time'][i] == '11:29:00':
        df_shl_pm = pd.DataFrame() # initialize prediction dataframe at 11:29:00
        print('---- call predicitno function shl_pm ---- %s' % df_history_ts_process['time'][i])
        current_pred_les_level = current_price4pmsi
        current_pred_les_trend = 0
        current_pred_les = current_pred_les_level + current_pred_les_trend
        current_pred_les_misc = 0
        current_pred_price = (current_pred_les + current_pred_les_misc) * current_si \
                                + global_parm_base_price
        current_pred_price_rounded = round(current_pred_price/100, 0) * 100
        current_pred_dynamic_increment = global_parm_dynamic_increment
        current_pred_set_price_rounded = current_pred_price_rounded + current_pred_dynamic_increment
    else:
#         call predicitno functino shl_pm, forcaste next k=1 step
        print('---- call predicitno function shl_pm ---- %s' % df_history_ts_process['time'][i])
        previous_time = get_previous_n_sec_time_as_str(df_history_ts_process['time'][i], 1)
        previous_pred_les_level = df_shl_pm[(df_shl_pm['ccyy-mm'] == df_history_ts_process['ccyy-mm'][i]) \
                                            & (df_shl_pm['time'] ==previous_time)].iloc[0]['pred_les_level']
        print('     previous_pred_les_level : %f' % previous_pred_les_level)
        
        previous_pred_les_trend = df_shl_pm[(df_shl_pm['ccyy-mm'] == df_history_ts_process['ccyy-mm'][i]) \
                                            & (df_shl_pm['time'] ==previous_time)].iloc[0]['pred_les_trend']
        print('     previous_pred_les_trend : %f' % previous_pred_les_trend)
        
        current_pred_les_level = global_parm_alpha * current_price4pmsi \
                                    + (1 - global_parm_alpha) * (previous_pred_les_level + previous_pred_les_trend)
        print('     current_pred_les_level  : %f' % current_pred_les_level)
        current_pred_les_trend = global_parm_beta * (current_pred_les_level - previous_pred_les_level) \
                                    + (1 - global_parm_beta) * previous_pred_les_trend
        print('     current_pred_les_trend  : %f' % current_pred_les_trend)
        current_pred_les = current_pred_les_level + current_pred_les_trend
        current_pred_les_misc = 0
        current_pred_price = (current_pred_les + current_pred_les_misc) * current_si \
                                + global_parm_base_price
        current_pred_price_rounded = round(current_pred_price/100, 0) * 100
        current_pred_dynamic_increment = global_parm_dynamic_increment
        current_pred_set_price_rounded = current_pred_price_rounded + current_pred_dynamic_increment
   
        
    # write results to shl_pm dataframe
            
    df_shl_pm_current = {
                         'ccyy-mm' : df_history_ts_process['ccyy-mm'][i]
                        ,'time' : df_history_ts_process['time'][i]
                        ,'bid' : df_history_ts_process['bid-price'][i]
                        ,'datetime' : current_datetime
                        ,'price4pm' : current_price4pm
                        ,'si' : current_si
                        ,'price4pmsi' :  current_price4pmsi
                        ,'pred_les_level' : current_pred_les_level
                        ,'pred_les_trend' : current_pred_les_trend
                        ,'current_pred_les' : current_pred_les
                        ,'pred_les_misc' : current_pred_les_misc
                        ,'pred_price' : current_pred_price
                        ,'pred_price_rounded' : current_pred_price_rounded
                        ,'pred_dynamic_increment' : current_pred_dynamic_increment # +200 or + 300
                        ,'pred_set_price_rounded' : current_pred_set_price_rounded
                        }
    df_shl_pm =  df_shl_pm.append(df_shl_pm_current, ignore_index=True)
    
    
In [155]:
    
df_shl_pm.head()
    
    Out[155]:
In [157]:
    
df_shl_pm.tail()
    
    Out[157]:
In [124]:
    
plt.figure(figsize=(12,6))
plt.plot(df_shl_pm['bid'])
# plt.plot(df_shl_pm['pred_price'])
plt.plot(df_shl_pm['pred_price'].shift(1))
# plt.plot(df_shl_pm['pred_price'].shift(-1))
    
    Out[124]:
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
    
In [166]:
    
# 11:29:51~
def predict_k_step_price(df_shl_pm, ccyy_mm, time, k):
    print('month & time  : ', ccyy_mm, time)
    print()
#     df_shl_pm_k = pd.DataFrame() # initialize prediction dataframe
    for sec in range(0, k):
        print('delta second(s) : ', sec)
        current_time  = get_future_n_sec_time_as_str(time, sec)
        print('current_time  : %s' % current_time)
        previous_time = get_previous_n_sec_time_as_str(current_time, 1)
        print('previous_time : %s' % previous_time)
        previous_pred_les_level = df_shl_pm[(df_shl_pm['ccyy-mm'] == global_parm_ccyy_mm) \
                                            & (df_shl_pm['time'] ==previous_time)].iloc[0]['pred_les_level']
        print('     previous_pred_les_level : %f' % previous_pred_les_level)
        
        previous_pred_les_trend = df_shl_pm[(df_shl_pm['ccyy-mm'] == global_parm_ccyy_mm) \
                                            & (df_shl_pm['time'] ==previous_time)].iloc[0]['pred_les_trend']
        print('     previous_pred_les_trend : %f' % previous_pred_les_trend)
        print('---- Pre-Process ---')
        ############ use predicted value for boost-trap
        previous_pred_price = df_shl_pm[(df_shl_pm['ccyy-mm'] == global_parm_ccyy_mm) \
                                            & (df_shl_pm['time'] == previous_time)].iloc[0]['pred_price']
        # pre-process: ccyy-mm-hh:mm:ss
        current_datetime = global_parm_ccyy_mm + ' ' + current_time
#         current_price4pm = df_history_ts_process['bid-price'][i] -  global_parm_base_price
        current_price4pm = previous_pred_price -  global_parm_base_price
        print('#### current_datetime   : %s ####' %  current_datetime)
        print('#### previous_pred_price: %s ####' %  previous_pred_price)
        print('#### current_price4pm   : %d ####' % current_price4pm)
        # get Seasonality-Index
        current_si = fetech_si(global_parm_ccyy_mm
                                             ,current_time
                                             ,df_parm_si)
        print('#### current_si         : %0.10f ####' %  current_si)
        # get de-seasoned price: price4pmsi
        current_price4pmsi = current_price4pm / current_si
        print('#### current_price4pmsi : %0.10f ####' % current_price4pmsi)
        current_pred_les_level = global_parm_alpha * current_price4pmsi \
                                    + (1 - global_parm_alpha) * (previous_pred_les_level + previous_pred_les_trend)
        print('     current_pred_les_level  : %f' % current_pred_les_level)
        current_pred_les_trend = global_parm_beta * (current_pred_les_level - previous_pred_les_level) \
                                    + (1 - global_parm_beta) * previous_pred_les_trend
        print('     current_pred_les_trend  : %f' % current_pred_les_trend)
        current_pred_les = current_pred_les_level + current_pred_les_trend
        current_pred_les_misc = 0
        current_pred_price = (current_pred_les + current_pred_les_misc) * current_si \
                                + global_parm_base_price
        current_pred_price_rounded = round(current_pred_price/100, 0) * 100
        current_pred_dynamic_increment = global_parm_dynamic_increment
        current_pred_set_price_rounded = current_pred_price_rounded + current_pred_dynamic_increment 
#         write results to shl_pm dataframe
        df_shl_pm_current = {
                             'ccyy-mm' : global_parm_ccyy_mm
                            ,'time' : current_time
                            ,'bid' : previous_pred_price
                            ,'datetime' : current_datetime
                            ,'price4pm' : current_price4pm
                            ,'si' : current_si
                            ,'price4pmsi' :  current_price4pmsi
                            ,'pred_les_level' : current_pred_les_level
                            ,'pred_les_trend' : current_pred_les_trend
                            ,'current_pred_les' : current_pred_les
                            ,'pred_les_misc' : current_pred_les_misc
                            ,'pred_price' : current_pred_price
                            ,'pred_price_rounded' : current_pred_price_rounded
                            ,'pred_dynamic_increment' : current_pred_dynamic_increment # +200 or + 300
                            ,'pred_set_price_rounded' : current_pred_set_price_rounded
                            }
        print('---------------------------')
        df_shl_pm =  df_shl_pm.append(df_shl_pm_current, ignore_index=True)
        
    return df_shl_pm
    
In [178]:
    
df_shl_pm_new = predict_k_step_price(df_shl_pm, global_parm_ccyy_mm, '11:29:51', 10)
    
    
In [181]:
    
df_shl_pm_new.tail(11)
    
    Out[181]:
In [182]:
    
df_shl_pm.tail()
    
    Out[182]:
In [186]:
    
plt.figure(figsize=(12,6))
plt.plot(df_shl_pm_new['bid'])
plt.plot(df_shl_pm_new['pred_price'].shift(1))
# plt.plot(df_shl_pm_k['pred_price'].shift(-1))
    
    Out[186]:
    
In [187]:
    
plt.figure(figsize=(12,6))
plt.plot(df_shl_pm['bid'])
# plt.plot(df_shl_pm['pred_price'])
plt.plot(df_shl_pm['pred_price'].shift(1))
# plt.plot(df_shl_pm['pred_price'].shift(-1))
    
    Out[187]:
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
# from __future__ import print_function, division
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()
import pandas as pd
import operator
from scipy import interp
from itertools import cycle
from sklearn import svm
from sklearn.utils.validation import check_random_state
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.ensemble import BaggingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import roc_curve, auc
from statsmodels.graphics.mosaicplot import mosaic
print(__doc__)
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]: