Import the most important libs
In [2]:
import pandas as pd
import numpy as np
import scipy as sp
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
import urllib.request # for HTTP requests (web scraping, APIs)
from datetime import datetime
import os
In [23]:
from sklearn.ensemble import GradientBoostingRegressor
Concurrency pair data are located on http://www.forextester.com/data/datasources .
You can adjust data with alpha (prediction interval) and nrow (number of rows to fetch).
In [24]:
nrows = 10000
alpha = 0.90
concurancy_pair_path = "./EURUSD.csv"
In [25]:
pair = pd.read_csv(concurancy_pair_path,
nrows = 1000,
dtype = {'<TIME>': np.str, '<DTYYYYMMDD>': np.str}
)
In [26]:
pair.rename(columns={'<TICKER>': 'TICKER',
'<DTYYYYMMDD>': 'DTYYYYMMDD',
'<TIME>': 'TIME',
'<OPEN>': 'OPEN',
'<HIGH>': 'HIGH',
'<LOW>': 'LOW',
'<CLOSE>': 'CLOSE',
'<VOL>': 'VOL'
}, inplace=True)
In [27]:
convert_date = (lambda row: datetime.strptime(str(row['DTYYYYMMDD'])+str(row['TIME']), '%Y%m%d%H%M%S'))
In [28]:
pair['DATETIME'] = pair.apply(convert_date, axis=1)
In [29]:
pair.head(10)
Out[29]:
In [30]:
one_day = list(pair.groupby('DTYYYYMMDD'))[0] # fetch first day
In [31]:
last_price = float(one_day[1].tail(1).OPEN) # ostatnia cena otwarcia
In [32]:
last_date = one_day[1].tail(1).DATETIME # ostatnia data
In [33]:
alpha = 0.95
In [34]:
clf = GradientBoostingRegressor(loss='quantile', alpha=alpha,
n_estimators=250, max_depth=3,
learning_rate=.1, min_samples_leaf=9,
min_samples_split=9)
In [35]:
X = np.atleast_2d(pair.DATETIME).T
X = X.astype(np.float32)
y = pair.CLOSE
In [36]:
xx = X
In [37]:
clf = GradientBoostingRegressor(loss='quantile', alpha=alpha,
n_estimators=250, max_depth=3,
learning_rate=.1, min_samples_leaf=9,
min_samples_split=9)
clf.fit(X, y)
# Make the prediction on the meshed x-axis
y_upper = clf.predict(xx)
clf.set_params(alpha=1.0 - alpha)
clf.fit(X, y)
# Make the prediction on the meshed x-axis
y_lower = clf.predict(xx)
clf.set_params(loss='ls')
clf.fit(X, y)
# Make the prediction on the meshed x-axis
y_pred = clf.predict(xx)
In [38]:
# Plot the function, the prediction and the 90% confidence interval based on
# the MSE
fig = plt.figure()
plt.plot(xx, y, 'g:', label=u'$f(x) = x\,\sin(x)$')
plt.plot(X, y, 'b.', markersize=10, label=u'Observations')
plt.plot(xx, y_pred, 'r-', label=u'Prediction')
plt.plot(xx, y_upper, 'k-')
plt.plot(xx, y_lower, 'k-')
plt.fill(np.concatenate([xx, xx[::-1]]),
np.concatenate([y_upper, y_lower[::-1]]),
alpha=.5, fc='b', ec='None', label='90% prediction interval')
plt.xlabel('$x$')
plt.ylabel('$f(x)$')
plt.ylim(min(y) * 0.998, max(y) * 1.003)
plt.legend(loc=2, borderaxespad=0., bbox_to_anchor=(1.05, 1))
plt.show()
In [39]:
back_offset = int(-1 *X.shape[0] * (1 - alpha))
In [40]:
clf = GradientBoostingRegressor(loss='quantile', alpha=alpha,
n_estimators=250, max_depth=3,
learning_rate=.1, min_samples_leaf=9,
min_samples_split=9)
clf.fit(X[:back_offset], y[:back_offset])
# Make the prediction on the meshed x-axis
y_upper = clf.predict(xx)
clf.set_params(alpha=1.0 - alpha)
clf.fit(X[:back_offset], y[:back_offset])
# Make the prediction on the meshed x-axis
y_lower = clf.predict(xx)
clf.set_params(loss='ls')
clf.fit(X[:back_offset], y[:back_offset])
# Make the prediction on the meshed x-axis
y_pred = clf.predict(xx)
In [41]:
# Plot the function, the prediction and the 90% confidence interval based on
# the MSE
fig = plt.figure()
plt.plot(xx, y, 'g:', label=u'$f(x) = x\,\sin(x)$')
plt.plot(X, y, 'b.', markersize=10, label=u'Observations')
plt.plot(xx, y_pred, 'r-', label=u'Prediction')
plt.plot(xx, y_upper, 'k-')
plt.plot(xx, y_lower, 'k-')
plt.fill(np.concatenate([xx, xx[::-1]]),
np.concatenate([y_upper, y_lower[::-1]]),
alpha=.5, fc='b', ec='None', label='90% prediction interval')
plt.xlabel('$x$')
plt.ylabel('$f(x)$')
plt.ylim(min(y) * 0.998, max(y) * 1.003)
plt.legend(loc=2, borderaxespad=0., bbox_to_anchor=(1.05, 1))
plt.show()
zoom to last area
In [44]:
# Plot the function, the prediction and the 90% confidence interval based on
# the MSE
fig = plt.figure()
plt.plot(xx, y, 'g:', label=u'$f(x) = x\,\sin(x)$')
plt.plot(X, y, 'b.', markersize=10, label=u'Observations')
plt.plot(xx, y_pred, 'r-', label=u'Prediction')
plt.plot(xx, y_upper, 'k-')
plt.plot(xx, y_lower, 'k-')
plt.fill(np.concatenate([xx, xx[::-1]]),
np.concatenate([y_upper, y_lower[::-1]]),
alpha=.5, fc='b', ec='None', label='90% prediction interval')
plt.xlabel('$x$')
plt.ylabel('$f(x)$')
plt.ylim(min(y) * 0.998, max(y) * 1.003)
plt.xlim((min(X)+max(X))/2, max(X))
plt.legend(loc=2, borderaxespad=0., bbox_to_anchor=(1.05, 1))
plt.show()
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]: