In [457]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.cross_validation import train_test_split
from sklearn import metrics
from sklearn.linear_model import SGDClassifier
from sklearn import linear_model
from sklearn import svm

In [458]:
linear = svm.SVC()
df = pd.read_csv("FINALOBAMA_minus_first_six.csv", header=0)

In [459]:
xtrain, xtest, ytrain, ytest = train_test_split(df['Close'], df['Approval'], train_size = 0.8)

In [460]:
linear.fit(pd.DataFrame(xtrain), pd.DataFrame(ytrain))


/Users/apprentice/anaconda/lib/python2.7/site-packages/sklearn/svm/base.py:472: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y_ = column_or_1d(y, warn=True)
Out[460]:
SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, degree=3, gamma=0.0,
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)

In [461]:
output = linear.predict(pd.DataFrame(xtest))

In [462]:
ytest = ytest.reset_index()["Approval"]

In [463]:
xtest


Out[463]:
125    1553.280029
29     2055.469971
196    1158.670044
95     1761.640015
184    1361.229980
152    1440.670044
169    1278.040039
260    1104.510010
45     1886.760010
241    1293.239990
98     1703.199951
143    1416.180054
7      2076.620117
103    1655.170044
93     1798.180054
173    1369.099976
199    1253.229980
220    1270.979980
148    1411.939941
57     1978.339966
84     1838.699951
165    1362.160034
108    1709.670044
225    1340.199951
166    1335.020020
81     1797.020020
217    1339.670044
254    1176.189941
200    1285.089966
134    1513.170044
39     2067.560059
27     2110.300049
207    1154.229980
170    1317.819946
100    1691.750000
112    1631.890015
259    1109.550049
182    1369.630005
124    1588.849976
41     2039.819946
48     1982.849976
40     2063.500000
179    1397.109985
255    1165.150024
251    1225.849976
65     1923.569946
244    1256.770020
3      2077.570068
88     1818.319946
270    1076.760010
121    1614.420044
229    1328.170044
157    1411.130005
46     1906.130005
47     1967.900024
72     1815.689941
18     2117.689941
281    1194.369995
268    1077.959961
Name: Close, dtype: float64

In [464]:
for i in range (0, len(output)):
    difference = output[i] - ytest[i]
    print difference, output[i], ytest[i]


-2 46 48
0 46 46
3 46 43
5 46 41
1 46 45
-2 46 48
0 46 46
5 50 45
5 46 41
-3 46 49
4 46 42
-5 46 51
0 46 46
2 46 44
5 46 41
-2 46 48
3 46 43
1 47 46
-4 46 50
2 43 41
0 40 40
-1 45 46
-1 44 45
-5 46 51
0 46 46
5 46 41
0 46 46
1 46 45
3 46 43
-7 46 53
4 47 43
-1 45 46
3 46 43
-2 46 48
1 46 45
0 46 46
3 49 46
2 47 45
-3 46 49
4 46 42
4 46 42
4 46 42
1 47 46
0 46 46
1 46 45
2 46 44
1 48 47
0 46 46
6 46 40
1 46 45
-4 46 50
1 46 45
0 46 46
5 46 41
-1 42 43
3 46 43
1 46 45
-1 46 47
0 46 46

In [ ]:


In [465]:
linear.score(pd.DataFrame(xtrain), pd.DataFrame(ytrain))


Out[465]:
0.86440677966101698

In [466]:
predictive_data_high = [1950, 2050, 2150, 2250, 2350, 2450, 2550, 2650, 2750, 2850]

In [467]:
predictive_data_same = [1950, 1950, 1950, 1950, 1950, 1950, 1950, 1950, 1950, 1950]

In [468]:
predictive_data_low = [1950, 1850, 1750, 1650, 1550, 1450, 1350, 1250, 1150, 1050]

In [469]:
output_high = linear.predict(pd.DataFrame(predictive_data_high))

In [470]:
output_high


Out[470]:
array([44, 46, 46, 46, 46, 46, 46, 46, 46, 46])

In [471]:
output_medium = linear.predict(pd.DataFrame(predictive_data_same))

In [472]:
output_medium


Out[472]:
array([44, 44, 44, 44, 44, 44, 44, 44, 44, 44])

In [473]:
output_low = linear.predict(pd.DataFrame(predictive_data_low))

In [474]:
output_low


Out[474]:
array([44, 46, 46, 49, 46, 46, 46, 46, 48, 46])

In [ ]:


In [475]:
regression = linear_model.LinearRegression()

In [476]:
regression.fit(pd.DataFrame(xtrain), pd.DataFrame(ytrain))


Out[476]:
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [477]:
print('Coefficients: \n', regression.coef_)


('Coefficients: \n', array([[-0.00219561]]))

In [478]:
print("Residual sum of squares: %.2f"
      % np.mean((regression.predict(pd.DataFrame(xtest)) - pd.DataFrame(ytest)) ** 2))


Residual sum of squares: 7.66

In [479]:
plt.plot(pd.DataFrame(xtest), pd.DataFrame(ytest), color="black")


Out[479]:
[<matplotlib.lines.Line2D at 0x10ef8e690>]

In [480]:
plt.xticks(())
plt.yticks(())


Out[480]:
([], <a list of 0 Text yticklabel objects>)

In [481]:
plt.show()

In [482]:
plt.scatter(pd.DataFrame(xtest), pd.DataFrame(ytest), color="black")


Out[482]:
<matplotlib.collections.PathCollection at 0x10f753510>

In [483]:
plt.show()

In [ ]:


In [ ]: