In [457]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.cross_validation import train_test_split
from sklearn import metrics
from sklearn.linear_model import SGDClassifier
from sklearn import linear_model
from sklearn import svm
In [458]:
linear = svm.SVC()
df = pd.read_csv("FINALOBAMA_minus_first_six.csv", header=0)
In [459]:
xtrain, xtest, ytrain, ytest = train_test_split(df['Close'], df['Approval'], train_size = 0.8)
In [460]:
linear.fit(pd.DataFrame(xtrain), pd.DataFrame(ytrain))
Out[460]:
In [461]:
output = linear.predict(pd.DataFrame(xtest))
In [462]:
ytest = ytest.reset_index()["Approval"]
In [463]:
xtest
Out[463]:
In [464]:
for i in range (0, len(output)):
difference = output[i] - ytest[i]
print difference, output[i], ytest[i]
In [ ]:
In [484]:
linear.score(pd.DataFrame(xtrain), pd.DataFrame(ytrain))
Out[484]:
In [485]:
predictive_data_high = [1950, 2050, 2150, 2250, 2350, 2450, 2550, 2650, 2750, 2850, 2900, 2950]
In [486]:
predictive_data_same = [1950, 1950, 1950, 1950, 1950, 1950, 1950, 1950, 1950, 1950, 1950, 1950]
In [487]:
predictive_data_low = [1950, 1850, 1750, 1650, 1550, 1450, 1350, 1250, 1150, 1050, 1000, 950]
In [488]:
output_high = linear.predict(pd.DataFrame(predictive_data_high))
In [489]:
output_high
Out[489]:
In [490]:
output_medium = linear.predict(pd.DataFrame(predictive_data_same))
In [491]:
output_medium
Out[491]:
In [492]:
output_low = linear.predict(pd.DataFrame(predictive_data_low))
In [493]:
output_low
Out[493]:
In [ ]:
In [494]:
regression = linear_model.LinearRegression()
In [495]:
regression.fit(pd.DataFrame(xtrain), pd.DataFrame(ytrain))
Out[495]:
In [496]:
print('Coefficients: \n', regression.coef_)
In [497]:
print("Residual sum of squares: %.2f"
% np.mean((regression.predict(pd.DataFrame(xtest)) - pd.DataFrame(ytest)) ** 2))
In [498]:
plt.plot(pd.DataFrame(xtest), pd.DataFrame(ytest), color="black")
Out[498]:
In [499]:
plt.xticks(())
plt.yticks(())
Out[499]:
In [500]:
plt.show()
In [501]:
plt.scatter(pd.DataFrame(xtest), pd.DataFrame(ytest), color="black")
Out[501]:
In [502]:
plt.show()
In [ ]:
In [ ]: