In [39]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.cross_validation import train_test_split
from sklearn import metrics
from sklearn.linear_model import SGDClassifier
from sklearn import linear_model
from sklearn import svm

In [40]:
linear = svm.SVC()
df = pd.read_csv("./app/Python/FINALOBAMA.csv", header=0)

In [80]:
xtrain, xtest, ytrain, ytest = train_test_split(df['Close'], df['Approval'], train_size = 0.8)

In [81]:
linear.fit(pd.DataFrame(xtrain), pd.DataFrame(ytrain))


/Users/jasminefeldmann1/anaconda/lib/python2.7/site-packages/sklearn/svm/base.py:472: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y_ = column_or_1d(y, warn=True)
Out[81]:
SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, degree=3, gamma=0.0,
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)

In [82]:
output = linear.predict(pd.DataFrame(xtest))

In [83]:
ytest = ytest.reset_index()["Approval"]

In [88]:
for i in range (0, len(output)):
    difference = output[i] - ytest[i]
    print difference, output[i], ytest[i]


2 46 44
-1 46 47
3 46 43
1 46 45
0 40 40
0 46 46
-3 40 43
0 46 46
-3 46 49
-12 46 58
3 46 43
-3 46 49
-2 46 48
3 63 60
2 46 44
-20 46 66
-1 46 47
6 46 40
3 46 43
1 46 45
1 46 45
-5 46 51
-6 46 52
-4 46 50
3 46 43
-3 46 49
1 45 44
-1 46 47
5 50 45
-1 45 46
0 46 46
-2 46 48
0 46 46
6 46 40
0 46 46
4 46 42
-1 46 47
4 46 42
-4 43 47
0 46 46
-5 46 51
0 46 46
-6 46 52
-6 46 52
1 47 46
-3 43 46
-4 46 50
2 43 41
1 43 42
-1 46 47
-6 46 52
-1 46 47
-1 45 46
5 46 41
-4 46 50
-11 43 54
-2 41 43
0 46 46
-2 46 48
5 46 41
-3 45 48
4 46 42
-18 46 64
-3 45 48
0 46 46
3 46 43
-1 46 47
-1 46 47
-6 46 52

In [89]:
linear.score(pd.DataFrame(xtrain), pd.DataFrame(ytrain))


Out[89]:
0.86594202898550721

In [ ]: