In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.cross_validation import train_test_split
from sklearn import metrics
from sklearn.linear_model import SGDClassifier
from sklearn import linear_model
from sklearn import svm
In [2]:
linear = svm.SVC()
df = pd.read_csv("obama_data.csv", header=0)
In [4]:
xtrain, xtest, ytrain, ytest = train_test_split(df['Unemployment Rate'], df['Obama Approval Rating'], train_size = 0.8)
In [5]:
linear.fit(pd.DataFrame(xtrain), pd.DataFrame(ytrain))
Out[5]:
In [6]:
output = linear.predict(pd.DataFrame(xtest))
In [7]:
ytest = ytest.reset_index()["Obama Approval Rating"]
In [8]:
for i in range (0, len(output)):
difference = output[i] - ytest[i]
print difference, output[i], ytest[i]
In [9]:
linear.score(pd.DataFrame(xtrain), pd.DataFrame(ytrain))
Out[9]:
In [ ]: