notebook.community

Edit and run



In [1]:

    
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.cross_validation import train_test_split
from sklearn import metrics
from sklearn.linear_model import SGDClassifier
from sklearn import linear_model
from sklearn import svm



In [2]:

    
linear = svm.SVC()
df = pd.read_csv("obama_data.csv", header=0)



In [4]:

    
xtrain, xtest, ytrain, ytest = train_test_split(df['Unemployment Rate'], df['Obama Approval Rating'], train_size = 0.8)



In [5]:

    
linear.fit(pd.DataFrame(xtrain), pd.DataFrame(ytrain))









    



/Users/apprentice/anaconda/lib/python2.7/site-packages/sklearn/svm/base.py:472: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y_ = column_or_1d(y, warn=True)






    Out[5]:





SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, degree=3, gamma=0.0,
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)



In [6]:

    
output = linear.predict(pd.DataFrame(xtest))



In [7]:

    
ytest = ytest.reset_index()["Obama Approval Rating"]



In [8]:

    
for i in range (0, len(output)):
    difference = output[i] - ytest[i]
    print difference, output[i], ytest[i]



In [9]:

    
linear.score(pd.DataFrame(xtrain), pd.DataFrame(ytrain))









    Out[9]:





0.25396825396825395



In [ ]: