In [457]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.cross_validation import train_test_split
from sklearn import metrics
from sklearn.linear_model import SGDClassifier
from sklearn import linear_model
from sklearn import svm
In [458]:
linear = svm.SVC()
df = pd.read_csv("FINALOBAMA_minus_first_six.csv", header=0)
In [459]:
xtrain, xtest, ytrain, ytest = train_test_split(df['Close'], df['Approval'], train_size = 0.8)
In [460]:
linear.fit(pd.DataFrame(xtrain), pd.DataFrame(ytrain))
Out[460]:
In [461]:
output = linear.predict(pd.DataFrame(xtest))
In [462]:
ytest = ytest.reset_index()["Approval"]
In [463]:
xtest
Out[463]:
In [464]:
for i in range (0, len(output)):
difference = output[i] - ytest[i]
print difference, output[i], ytest[i]
In [ ]:
In [465]:
linear.score(pd.DataFrame(xtrain), pd.DataFrame(ytrain))
Out[465]:
In [466]:
predictive_data_high = [1950, 2050, 2150, 2250, 2350, 2450, 2550, 2650, 2750, 2850]
In [467]:
predictive_data_same = [1950, 1950, 1950, 1950, 1950, 1950, 1950, 1950, 1950, 1950]
In [468]:
predictive_data_low = [1950, 1850, 1750, 1650, 1550, 1450, 1350, 1250, 1150, 1050]
In [469]:
output_high = linear.predict(pd.DataFrame(predictive_data_high))
In [470]:
output_high
Out[470]:
In [471]:
output_medium = linear.predict(pd.DataFrame(predictive_data_same))
In [472]:
output_medium
Out[472]:
In [473]:
output_low = linear.predict(pd.DataFrame(predictive_data_low))
In [474]:
output_low
Out[474]:
In [ ]:
In [475]:
regression = linear_model.LinearRegression()
In [476]:
regression.fit(pd.DataFrame(xtrain), pd.DataFrame(ytrain))
Out[476]:
In [477]:
print('Coefficients: \n', regression.coef_)
In [478]:
print("Residual sum of squares: %.2f"
% np.mean((regression.predict(pd.DataFrame(xtest)) - pd.DataFrame(ytest)) ** 2))
In [479]:
plt.plot(pd.DataFrame(xtest), pd.DataFrame(ytest), color="black")
Out[479]:
In [480]:
plt.xticks(())
plt.yticks(())
Out[480]:
In [481]:
plt.show()
In [482]:
plt.scatter(pd.DataFrame(xtest), pd.DataFrame(ytest), color="black")
Out[482]:
In [483]:
plt.show()
In [ ]:
In [ ]: