Linear Regression using SciKit Learn

Date Created: 28/03/2017 Author: Nilesh Chaturvedi

This is my first .pynb and this code is for regression.


In [10]:
#import necessary libraries
from sklearn import linear_model
from sklearn.preprocessing import normalize, PolynomialFeatures
import matplotlib.pyplot as plt
import csv
import numpy

In [11]:
#Load Data
def load_data(filename):
    
    file_data = csv.reader(open(filename, "r"), delimiter = ",")
    training_data = []
    testing_data = []
    for training_example in list(file_data)[2:]:
        if training_example[5]!="NaN":
            training_data.append([float(feature) for feature in training_example[:6]])
        else:
            testing_data.append([float(feature) for feature in training_example[:5]])
            
    return numpy.array(training_data), numpy.array(testing_data)

In [12]:
def linear_regression_model(training):
    
    # Extract the features from training data.
    training_x = training[:,:5]
    
    # Extract values corresponding to every training example.
    training_y = (training[:,5])[:,numpy.newaxis]
    
    
    #normalize the data
    normalized_x = normalize(training_x, norm='l1', axis=0)
    normalized_y = normalize(training_y, norm='l1', axis=0)
    
    #Fit a linear model to the training data.
    linear = linear_model.LinearRegression()
    linear.fit(normalized_x, normalized_y)
    
    return linear

In [13]:
if __name__ == "__main__":
   
    input_data = load_data("data_carsmall.csv")
    training_data = input_data[0]
    to_be_predicted = input_data[1]
    #normalize(input_data[1], norm = 'l1', axis = 0)
    
    # Estimate using linear model
    linear_model = linear_regression_model(training_data)
    linear_model_output = linear_model.predict(to_be_predicted)

    for point in range(len(to_be_predicted)):
        print(str(to_be_predicted[point]) + " : " + str(linear_model_output[point]) + "\n\n")
    
    print("Linear Model Statistics \n\nWeights : {} \nBias : {}".format(linear_model.coef_, linear_model.intercept_))
    
    # Plot Data
    plt.scatter(to_be_predicted[:,0], linear_model_output, label = "Feature 1")
    plt.scatter(to_be_predicted[:,1], linear_model_output, label = "Feature 2")
    plt.scatter(to_be_predicted[:,2], linear_model_output, label = "Feature 3")
    plt.scatter(to_be_predicted[:,3], linear_model_output, label = "Feature 4")
    plt.scatter(to_be_predicted[:,4], linear_model_output, label = "Feature 5")
    plt.show()


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-13-f9942c613d59> in <module>()
     10 
     11     for point in range(len(training_data)):
---> 12         print(str(to_be_predicted[point]) + " : " + str(linear_model_output[point]) + "\n\n")
     13 
     14     print("Linear Model Statistics \n\nWeights : {} \nBias : {}".format(linear_model.coef_, linear_model.intercept_))

NameError: name 'to_be_predicted' is not defined

In [ ]: