In [2]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import csvtodb as DB
%matplotlib inline

Adjusted Close


In [3]:
symbol = "SBIN"
Datframe = DB.obtainQuotes(symbol)
Datframe['Adj_Close'] = Datframe.Close/Datframe.Close[0]
#print Datframe
fig, ax = plt.subplots()
ax.set_xticklabels(Datframe.index[::len(Datframe)/7],rotation=70)
ax.autoscale()
plt.plot(Datframe['Adj_Close'])
plt.show()


Percentage change per day


In [4]:
Datframe['Per_Change'] = 0.0
for i in range(1,len(Datframe)):
    Datframe['Per_Change'][i] = Datframe['Close'][i]/Datframe['Close'][i-1]
fig, ax = plt.subplots()
ax.set_xticklabels(Datframe.index[::len(Datframe)/7],rotation=70)
ax.autoscale()
plt.plot(Datframe['Per_Change'])
plt.show()


Bollinger Bands


In [17]:
bollinger_period = 20
Datframe['Bollinger'] = 0.0
means = pd.rolling_mean(Datframe['Close'],bollinger_period,min_periods=bollinger_period)
rolling_std = pd.rolling_std(Datframe['Close'],bollinger_period,min_periods=bollinger_period)
Datframe['Bollinger'] = (Datframe['Close'] - means)/rolling_std
Datframe['Bollinger2'] = Datframe['Bollinger'] * Datframe['Bollinger']
fig, ax = plt.subplots()
ax.set_xticklabels(Datframe.index[::len(Datframe)/7],rotation=70)
ax.autoscale()
plt.plot(Datframe['Bollinger'])
plt.show()


Moving Average


In [18]:
moving_average_period = 14
Datframe['14MA'] = 0.0
means = pd.rolling_mean(Datframe['Close'],bollinger_period,min_periods=bollinger_period)
Datframe['14MA'] = (Datframe['Close'] - means)/ means
Datframe['14MA2'] = Datframe['14MA'] * Datframe['14MA']
fig, ax = plt.subplots()
ax.set_xticklabels(Datframe.index[::len(Datframe)/7],rotation=70)
ax.autoscale()
plt.plot(Datframe['14MA'])
plt.show()


Target1


In [6]:
days = 5
Datframe['Target1'] = 0.0
for i in range(0,len(Datframe)-days):
    Datframe['Target1'][i] = (Datframe['Close'][i+5]/Datframe['Close'][i]) > 1.03

DataProcessing


In [20]:
inputDataFormat = "Per_Change 0,Per_Change 1,Bollinger 2,Bollinger2 2,Bollinger 1,Bollinger2 1,Bollinger 0,Bollinger2 0,14MA 0,14MA2 0" + \
                  ",14MA 1,14MA2 1,14MA 2,14MA2 2"
targetDataFormat = "Target1"
skipLastDays = 5
items = inputDataFormat.split(',')
reference= []
param = []
for item in items:
    param.append(item.split(' ')[0])
    reference.append(int(item.split(' ')[1]))
    
inputData = []
target = []
for i in range(max(reference)+1,len(Datframe)-skipLastDays):
    line = []
    for vari in range(0,len(param)):
        line.append(Datframe[param[vari]][i-reference[vari]])
    target.append(Datframe[targetDataFormat][i])
    inputData.append(line)
print len(target),len(inputData)
inputData = np.array(inputData)
inputData[np.isnan(inputData)] = 0
target = np.array(target)


2780 2780

SVM Learning


In [21]:
from sklearn import svm
rbf_svc = svm.SVC(kernel='poly', degree = 4, C = 1.0).fit(inputData[0:2000],target[0:2000])
y = rbf_svc.predict(inputData)
print np.sum(abs(y-target))
pseudo_y = np.array([0] * len(y))
print np.sum(abs(pseudo_y-target))
print sum(y != pseudo_y)


791.0
812.0
29

In [24]:
# Plotting input data
%matplotlib inline
x1 = inputData[:,10]
x2 = inputData[:,8]
y = target
plt.scatter(x1,x2,c=y)
plt.show()



In [ ]: