Import the most important libs


In [2]:
import pandas as pd
import numpy as np
import scipy as sp
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
import urllib.request  # for HTTP requests (web scraping, APIs)
from datetime import datetime
import os

In [23]:
from sklearn.ensemble import GradientBoostingRegressor

Concurrency pair data are located on http://www.forextester.com/data/datasources .

You can adjust data with alpha (prediction interval) and nrow (number of rows to fetch).


In [24]:
nrows = 10000
alpha = 0.90
concurancy_pair_path = "./EURUSD.csv"

In [25]:
pair = pd.read_csv(concurancy_pair_path,
                   nrows = 1000,
                   dtype = {'<TIME>': np.str, '<DTYYYYMMDD>': np.str}
                  )

In [26]:
pair.rename(columns={'<TICKER>': 'TICKER', 
                 '<DTYYYYMMDD>': 'DTYYYYMMDD', 
                 '<TIME>': 'TIME', 
                 '<OPEN>': 'OPEN',
                 '<HIGH>': 'HIGH',
                 '<LOW>': 'LOW',
                 '<CLOSE>': 'CLOSE',
                 '<VOL>': 'VOL'
                }, inplace=True)

In [27]:
convert_date = (lambda row: datetime.strptime(str(row['DTYYYYMMDD'])+str(row['TIME']), '%Y%m%d%H%M%S'))

In [28]:
pair['DATETIME'] = pair.apply(convert_date, axis=1)

In [29]:
pair.head(10)


Out[29]:
TICKER DTYYYYMMDD TIME OPEN HIGH LOW CLOSE VOL DATETIME
0 EURUSD 20010102 230100 0.9507 0.9507 0.9507 0.9507 4 2001-01-02 23:01:00
1 EURUSD 20010102 230200 0.9506 0.9506 0.9505 0.9505 4 2001-01-02 23:02:00
2 EURUSD 20010102 230300 0.9505 0.9507 0.9505 0.9506 4 2001-01-02 23:03:00
3 EURUSD 20010102 230400 0.9506 0.9506 0.9506 0.9506 4 2001-01-02 23:04:00
4 EURUSD 20010102 230500 0.9506 0.9506 0.9506 0.9506 4 2001-01-02 23:05:00
5 EURUSD 20010102 230600 0.9506 0.9506 0.9506 0.9506 4 2001-01-02 23:06:00
6 EURUSD 20010102 230700 0.9505 0.9507 0.9505 0.9507 4 2001-01-02 23:07:00
7 EURUSD 20010102 230800 0.9507 0.9507 0.9507 0.9507 4 2001-01-02 23:08:00
8 EURUSD 20010102 230900 0.9507 0.9507 0.9507 0.9507 4 2001-01-02 23:09:00
9 EURUSD 20010102 231000 0.9507 0.9507 0.9507 0.9507 4 2001-01-02 23:10:00

In [30]:
one_day = list(pair.groupby('DTYYYYMMDD'))[0] # fetch first day

In [31]:
last_price = float(one_day[1].tail(1).OPEN) # ostatnia cena otwarcia

In [32]:
last_date = one_day[1].tail(1).DATETIME # ostatnia data

In [33]:
alpha = 0.95

In [34]:
clf = GradientBoostingRegressor(loss='quantile', alpha=alpha,
                                n_estimators=250, max_depth=3,
                                learning_rate=.1, min_samples_leaf=9,
                                min_samples_split=9)

In [35]:
X = np.atleast_2d(pair.DATETIME).T
X = X.astype(np.float32)
y = pair.CLOSE

In [36]:
xx = X

In [37]:
clf = GradientBoostingRegressor(loss='quantile', alpha=alpha,
                                n_estimators=250, max_depth=3,
                                learning_rate=.1, min_samples_leaf=9,
                                min_samples_split=9)

clf.fit(X, y)

# Make the prediction on the meshed x-axis
y_upper = clf.predict(xx)

clf.set_params(alpha=1.0 - alpha)
clf.fit(X, y)

# Make the prediction on the meshed x-axis
y_lower = clf.predict(xx)

clf.set_params(loss='ls')
clf.fit(X, y)

# Make the prediction on the meshed x-axis
y_pred = clf.predict(xx)

In [38]:
# Plot the function, the prediction and the 90% confidence interval based on
# the MSE
fig = plt.figure()
plt.plot(xx, y, 'g:', label=u'$f(x) = x\,\sin(x)$')
plt.plot(X, y, 'b.', markersize=10, label=u'Observations')
plt.plot(xx, y_pred, 'r-', label=u'Prediction')
plt.plot(xx, y_upper, 'k-')
plt.plot(xx, y_lower, 'k-')
plt.fill(np.concatenate([xx, xx[::-1]]),
         np.concatenate([y_upper, y_lower[::-1]]),
         alpha=.5, fc='b', ec='None', label='90% prediction interval')
plt.xlabel('$x$')
plt.ylabel('$f(x)$')
plt.ylim(min(y) * 0.998, max(y) * 1.003)
plt.legend(loc=2, borderaxespad=0., bbox_to_anchor=(1.05, 1))
plt.show()



In [39]:
back_offset = int(-1 *X.shape[0] * (1 - alpha))

In [40]:
clf = GradientBoostingRegressor(loss='quantile', alpha=alpha,
                                n_estimators=250, max_depth=3,
                                learning_rate=.1, min_samples_leaf=9,
                                min_samples_split=9)

clf.fit(X[:back_offset], y[:back_offset])

# Make the prediction on the meshed x-axis
y_upper = clf.predict(xx)

clf.set_params(alpha=1.0 - alpha)
clf.fit(X[:back_offset], y[:back_offset])

# Make the prediction on the meshed x-axis
y_lower = clf.predict(xx)

clf.set_params(loss='ls')
clf.fit(X[:back_offset], y[:back_offset])

# Make the prediction on the meshed x-axis
y_pred = clf.predict(xx)

In [41]:
# Plot the function, the prediction and the 90% confidence interval based on
# the MSE
fig = plt.figure()
plt.plot(xx, y, 'g:', label=u'$f(x) = x\,\sin(x)$')
plt.plot(X, y, 'b.', markersize=10, label=u'Observations')
plt.plot(xx, y_pred, 'r-', label=u'Prediction')
plt.plot(xx, y_upper, 'k-')
plt.plot(xx, y_lower, 'k-')
plt.fill(np.concatenate([xx, xx[::-1]]),
         np.concatenate([y_upper, y_lower[::-1]]),
         alpha=.5, fc='b', ec='None', label='90% prediction interval')
plt.xlabel('$x$')
plt.ylabel('$f(x)$')
plt.ylim(min(y) * 0.998, max(y) * 1.003)
plt.legend(loc=2, borderaxespad=0., bbox_to_anchor=(1.05, 1))
plt.show()


zoom to last area


In [44]:
# Plot the function, the prediction and the 90% confidence interval based on
# the MSE
fig = plt.figure()
plt.plot(xx, y, 'g:', label=u'$f(x) = x\,\sin(x)$')
plt.plot(X, y, 'b.', markersize=10, label=u'Observations')
plt.plot(xx, y_pred, 'r-', label=u'Prediction')
plt.plot(xx, y_upper, 'k-')
plt.plot(xx, y_lower, 'k-')
plt.fill(np.concatenate([xx, xx[::-1]]),
         np.concatenate([y_upper, y_lower[::-1]]),
         alpha=.5, fc='b', ec='None', label='90% prediction interval')
plt.xlabel('$x$')
plt.ylabel('$f(x)$')
plt.ylim(min(y) * 0.998, max(y) * 1.003)
plt.xlim((min(X)+max(X))/2, max(X))
plt.legend(loc=2, borderaxespad=0., bbox_to_anchor=(1.05, 1))
plt.show()



In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]: