In [1]:
import numpy as np
import pandas as pd
from sklearn.ensemble import *
import xgboost as xgb
import operator
import settings
import utils
import get_data
from ta import *
API: http://bitcoincharts.com/charts
period = ['1-min', '5-min', '15-min', '30-min', 'Hourly', '2-hour', '6-hour', '12-hour', 'Daily', 'Weekly']
market = ['krakenEUR', 'bitstampUSD'] -> list of markets: https://bitcoincharts.com/charts/volumepie/
In [2]:
# get_data.get('data/datas.csv', period=settings.PERIOD, market=settings.MARKET)
In [3]:
df = pd.read_csv('data/datas.csv', sep=',')
In [4]:
# add next row
last_timestamp = df['Timestamp'].iloc[-1]
if settings.PERIOD == 'Hourly':
next_timestamp = last_timestamp + 3600
df_next = pd.DataFrame([next_timestamp], columns=['Timestamp'])
df = df.append(df_next, ignore_index=True)
df.iloc[-1] = df.iloc[-1].fillna(1)
In [5]:
print('Number of rows: {}, Number of columns: {}'.format(*df.shape))
In [6]:
df = utils.dropna(df)
In [7]:
print('Number of rows: {}, Number of columns: {}'.format(*df.shape))
Create column target with class [UP, KEEP, DOWN]
In [8]:
df['Target'] = 0 # 'KEEP'
df.loc[df.Open + (df.Open * settings.PERCENT_UP) < df.Close, 'Target'] = 1 # 'UP'
df.loc[df.Open - (df.Open * settings.PERCENT_DOWN) > df.Close, 'Target'] = 2 # 'DOWN'
In [9]:
print('Number of rows: {}, Number of columns: {}'.format(*df.shape))
print('Number of UP rows: {}, Number of DOWN rows: {}'.format(len(df[df.Target == 1]), len(df[df.Target == 2])))
Create columns from Timestamp to Date, Year, Month, Hour, etc.
In [10]:
df['Date'] = df['Timestamp'].apply(utils.timestamptodate)
df['Date'] = pd.to_datetime(df['Date'])
df['Year'] = df['Date'].dt.year
df['Month'] = df['Date'].dt.month
df['Week'] = df['Date'].dt.weekofyear
df['Weekday'] = df['Date'].dt.weekday
df['Day'] = df['Date'].dt.day
df['Hour'] = df['Date'].dt.hour
# extra dates
# df["yearmonth"] = df["Date"].dt.year*100 + df["Date"].dt.month
# df["yearweek"] = df["Date"].dt.year*100 + df["Date"].dt.weekofyear
# df["yearweekday"] = df["Date"].dt.year*10 + df["Date"].dt.weekday
In [11]:
# shift
cols = ['Open', 'High', 'Low', 'Close', 'Volume_BTC', 'Volume_Currency', 'Weighted_Price']
for col in cols:
df[col] = df[col].shift(1)
df = df.dropna()
In [12]:
df['High-low'] = df['High'] - df['Low']
df['Close-open'] = df['Close'] - df['Open']
df['Up_or_Down'] = 0 # 'UP' or 'DOWN' if diff > settings.PERCENT_UP
df.loc[( df.Open + (df.Open * settings.PERCENT_UP) ) < df.Close, 'Up_or_Down'] = 1 # 'UP'
df.loc[( df.Open - (df.Open * settings.PERCENT_DOWN) ) > df.Close, 'Up_or_Down'] = 2 # 'DOWN'
df['Up_or_Down_2'] = 0 # 'UP' or 'DOWN' if diff > settings.PERCENT_UP * 2
df.loc[df.Open + (df.Open * settings.PERCENT_UP * 2 ) < df.Close, 'Up_or_Down_2'] = 1 # 'UP'
df.loc[df.Open - (df.Open * settings.PERCENT_DOWN * 2) > df.Close, 'Up_or_Down_2'] = 2 # 'DOWN'
df['Up_or_Down_3'] = 0 # 'UP' or 'DOWN' if diff > 0
df.loc[df.Open < df.Close, 'Up_or_Down_3'] = 1 # 'UP'
df.loc[df.Open > df.Close, 'Up_or_Down_3'] = 2 # 'DOWN'
df['Up_or_Down_4'] = 0 # 'UP' or 'DOWN' if diff > settings.PERCENT_UP / 2
df.loc[df.Open + (df.Open * settings.PERCENT_UP / 2 ) < df.Close, 'Up_or_Down_4'] = 1 # 'UP'
df.loc[df.Open - (df.Open * settings.PERCENT_DOWN / 2) > df.Close, 'Up_or_Down_4'] = 2 # 'DOWN'
In [13]:
# Fundamental analysis
# daily return
df['Daily_return'] = (df['Close'] / df['Close'].shift(1)) - 1
df['Daily_return_100'] = ((df['Close'] / df['Close'].shift(1)) - 1) * 100
# cumulative return
df = df.dropna()
df['Cumulative_return'] = (df['Close'] / df['Close'].iloc[0]) - 1
df['Cumulative_return_100'] = ((df['Close'] / df['Close'].iloc[0]) - 1) * 100
# TODO: cumulative return week, month, year...
In [14]:
print('Number of rows: {}, Number of columns: {}'.format(*df.shape))
In [15]:
# Accumulation/Distribution index
df['Acc_Dist_Roc_BTC'] = acc_dist_roc(df, 'Volume_BTC', 2)
df['Acc_Dist_Roc_Currency'] = acc_dist_roc(df, 'Volume_Currency', 2)
df['Acc_Dist_BTC'] = acc_dist_index(df, 'Volume_BTC')
df['Acc_Dist_Currency'] = acc_dist_index(df, 'Volume_Currency')
# Chaikin Money Flow
df['Chaikin_Money_Flow_1_BTC'] = chaikin_money_flow1(df, 'Volume_BTC')
df['Chaikin_Money_Flow_2_BTC'] = chaikin_money_flow2(df, 'Volume_BTC', 20)
df['Chaikin_Money_Flow_3_BTC'] = chaikin_money_flow3(df, 'Volume_BTC', 20)
df['Chaikin_Money_Flow_1_Currency'] = chaikin_money_flow1(df, 'Volume_Currency')
df['Chaikin_Money_Flow_2_Currency'] = chaikin_money_flow2(df, 'Volume_Currency', 20)
df['Chaikin_Money_Flow_3_Currency'] = chaikin_money_flow3(df, 'Volume_Currency', 20)
# Money Flow Index
df['Money_Flow_BTC'] = money_flow_index(df, 'Volume_BTC', 14)
df['Money_Flow_Currency'] = money_flow_index(df, 'Volume_Currency', 14)
# On-balance volume
df['OBV_BTC'] = on_balance_volume(df, 'Volume_BTC')
df['OBV_BTC_mean'] = on_balance_volume_mean(df, 'Volume_BTC')
df['OBV_Currency'] = on_balance_volume(df, 'Volume_Currency')
df['OBV_Currency_mean'] = on_balance_volume_mean(df, 'Volume_Currency')
# Force Index
df['Force_Index_BTC'] = force(df, 'Volume_BTC', 2)
df['Force_Index_Currency'] = force(df, 'Volume_Currency', 2)
# delete intermediate columns
df.drop('OBV', axis=1, inplace=True)
In [16]:
# Moving Average Convergence Divergence
df[['MACD', 'MACD_sign', 'MACD_diff']] = macd(df, 12, 26, 9)
# Average directional movement index
df[['ADX', 'ADX_pos', 'ADX_neg']] = adx(df, 14)
# Vortex indicator
df[['Vortex_pos', 'Vortex_neg']] = vortex(df, 14)
In [17]:
df['RSI'] = rsi(df, 14)
In [18]:
"""
for c in df.columns:
print str(c) + u' - ' + str(df[c].isnull().sum())
"""
Out[18]:
In [19]:
# Momentum
for idx in range(9):
m = idx+2
df['Momentum_'+str(m)] = ((df['Close'] / df['Close'].shift(m)) - 1)
# Rollings
for idx in range(9):
m = idx+2
df['Rolling_mean_'+str(m)] = (df.set_index('Date')['Close'].rolling(window=m).mean()).values
df['Rolling_std_'+str(m)] = (df.set_index('Date')['Close'].rolling(window=m).std()).values
df['Rolling_cov_'+str(m)] = (df.set_index('Date')['Close'].rolling(window=m).cov()).values
# Bollinger bands
for idx in range(9):
m = idx+2
df['Bollinger_band_mean_'+str(m)+'_max'] = df['Rolling_mean_'+str(m)] + (2*df['Rolling_std_'+str(m)])
df['Bollinger_band_mean_'+str(m)+'_min'] = df['Rolling_mean_'+str(m)] - (2*df['Rolling_std_'+str(m)])
In [20]:
print('Number of rows: {}, Number of columns: {}'.format(*df.shape))
df = df.dropna()
print('Number of rows: {}, Number of columns: {}'.format(*df.shape))
In [21]:
train, test = utils.split_df(df)
In [22]:
excl = ['Target', 'Date', 'Timestamp']
cols = [c for c in df.columns if c not in excl]
In [ ]:
y_train = train['Target']
y_mean = np.mean(y_train)
xgb_params = {
'n_trees': 800,
'eta': 0.0045,
'max_depth': 20,
'subsample': 0.95,
'colsample_bytree': 0.95,
'colsample_bylevel': 0.95,
'objective': 'multi:softmax',
'num_class' : 3,
'eval_metric': 'mlogloss', # 'merror', # 'rmse',
'base_score': 0,
'silent': 1
}
dtrain = xgb.DMatrix(train[cols], y_train)
dtest = xgb.DMatrix(test[cols])
cv_result = xgb.cv(xgb_params, dtrain)
# xgboost, cross-validation
cv_result = xgb.cv(xgb_params,
dtrain,
num_boost_round=5000,
early_stopping_rounds=50,
verbose_eval=50,
show_stdv=False
)
num_boost_rounds = len(cv_result)
# num_boost_rounds = 1000
print(num_boost_rounds)
# train
model = xgb.train(xgb_params, dtrain, num_boost_round=num_boost_rounds)
# predict
y_pred = model.predict(dtest)
y_true = test['Target']
prediction_value = y_true.tolist()[0]
if prediction_value == 1.0:
print("Prediction: UP")
elif prediction_value == 2.0:
print("Prediction: DOWN")
else: # 0.0
print("Prediction: KEEP")
print "\n \n \n \n \n \n ********** WEIGHT ************"
importance = model.get_fscore()
importance = sorted(importance.items(), key=operator.itemgetter(1))
for i in importance:
print i
print "\n \n \n \n \n \n ********** GAIN ************"
importance = model.get_score(fmap='', importance_type='gain')
importance = sorted(importance.items(), key=operator.itemgetter(1))
for i in importance:
print i
In [ ]: