In [1]:
from sklearn import linear_model
import matplotlib.pyplot as plt
import csv
import numpy
In [2]:
#Load Data
def load_data(filename):
file_data = csv.reader(open(filename, "r"), delimiter = ",")
training_data = []
testing_data = []
count = 0
for training_example in list(file_data)[1:]:
if count < 90:
training_data.append([float(feature) for feature in training_example[:3]])
else:
testing_data.append([float(feature) for feature in training_example[:2]])
count += 1
return numpy.array(training_data), numpy.array(testing_data)
In [3]:
def logistic_regression_model(training):
# Extract the features from training data.
training_x = training[:,:2]
print(training_x)
# Extract values corresponding to every training example.
training_y = (training[:,2])[:,numpy.newaxis].ravel()
print(training_y)
#Fit a logistic model to the training data.
logistic = linear_model.LogisticRegression()
logistic.fit(training_x, training_y)
return logistic
In [4]:
if __name__ == "__main__":
input_data = load_data("ex2data1-logistic.csv")
training_data = input_data[0]
to_be_predicted = input_data[1]
# Estimate using logistic model
logistic_model = logistic_regression_model(training_data)
logistic_model_output = logistic_model.predict(to_be_predicted)
for point in range(len(to_be_predicted)):
print(str(to_be_predicted[point]) + " : " + str(logistic_model_output[point]) + "\n\n")
print("Logistic Model Statistics \n\nWeights : {} \nBias : {}".format(logistic_model.coef_, logistic_model.intercept_))
# Plot Data
plt.ylim(0.0, 100.0);
x = numpy.linspace(0, 10, 100)
plt.plot(x, logistic_model.coef_ * x + logistic_model.intercept_)
plt.scatter(to_be_predicted[:,0], to_be_predicted[:,1], color="r")
plt.scatter(training_data[:,0], training_data[:,1], color = "b")
plt.show()
In [ ]: