In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.cross_validation import train_test_split
from sklearn import metrics
from sklearn.linear_model import SGDClassifier
from sklearn import linear_model
from sklearn import svm

In [2]:
linear = svm.SVC()
df = pd.read_csv("obama_data.csv", header=0)

In [4]:
xtrain, xtest, ytrain, ytest = train_test_split(df['Unemployment Rate'], df['Obama Approval Rating'], train_size = 0.8)

In [5]:
linear.fit(pd.DataFrame(xtrain), pd.DataFrame(ytrain))


/Users/apprentice/anaconda/lib/python2.7/site-packages/sklearn/svm/base.py:472: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y_ = column_or_1d(y, warn=True)
Out[5]:
SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, degree=3, gamma=0.0,
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)

In [6]:
output = linear.predict(pd.DataFrame(xtest))

In [7]:
ytest = ytest.reset_index()["Obama Approval Rating"]

In [8]:
for i in range (0, len(output)):
    difference = output[i] - ytest[i]
    print difference, output[i], ytest[i]


3 46 43
-6 46 52
0 46 46
0 42 42
1 46 45
-3 46 49
-11 46 57
-2 42 44
-3 46 49
-4 46 50
-7 46 53
-15 46 61
1 46 45
0 46 46
1 46 45
-7 46 53

In [9]:
linear.score(pd.DataFrame(xtrain), pd.DataFrame(ytrain))


Out[9]:
0.25396825396825395

In [ ]: