Date Created: 28/03/2017
Author: Nilesh Chaturvedi
In [1]:
#import necessary libraries
from sklearn import linear_model
from sklearn.preprocessing import normalize, PolynomialFeatures
import matplotlib.pyplot as plt
import csv
import numpy
In [2]:
#Load Data
def load_data(filename):
file_data = csv.reader(open(filename, "r"), delimiter = ",")
training_data = []
testing_data = []
for training_example in list(file_data)[2:]:
if training_example[5]!="NaN":
training_data.append([float(feature) for feature in training_example[:6]])
else:
testing_data.append([float(feature) for feature in training_example[:5]])
return numpy.array(training_data), numpy.array(testing_data)
In [3]:
def polynomial_regression_model(training):
# Extract the features from training data.
training_x = training[:,:5]
# Extract values corresponding to every training example.
training_y = (training[:,5])[:,numpy.newaxis]
#normalize the data
normalized_x = normalize(training_x, norm='l1', axis=0)
normalized_y = normalize(training_y, norm='l1', axis=0)
# Make a polynomial transform of the data.
feature_transform = PolynomialFeatures(degree=2)
polynomial_x = feature_transform.fit_transform(normalized_x)
#Fit linear model to transformed data
polynomial = linear_model.LinearRegression()
polynomial.fit(polynomial_x, normalized_y)
return polynomial
In [4]:
if __name__ == "__main__":
input_data = load_data("data_carsmall.csv")
training_data = input_data[0]
normalized_test = normalize(input_data[1], norm = 'l1', axis = 0)
feature_transform = PolynomialFeatures(degree=2)
to_be_predicted = feature_transform.fit_transform(normalized_test)
#Estimate using polynomial model
polynomial_model = polynomial_regression_model(training_data)
polynomial_model_output = polynomial_model.predict(to_be_predicted )
for point in range(len(to_be_predicted)):
print(str(to_be_predicted[point]) + " : " + str(polynomial_model_output[point]) + "\n\n")
print("Polynomial Model Statistics \n\nWeights : {} \nBias : {}".format(polynomial_model.coef_, polynomial_model.intercept_))
plt.scatter(to_be_predicted[:,0], polynomial_model_output, label = "Feature 1")
plt.scatter(to_be_predicted[:,1], polynomial_model_output, label = "Feature 2")
plt.scatter(to_be_predicted[:,2], polynomial_model_output, label = "Feature 3")
plt.scatter(to_be_predicted[:,3], polynomial_model_output, label = "Feature 4")
plt.scatter(to_be_predicted[:,4], polynomial_model_output, label = "Feature 5")
plt.show()