notebook.community

Edit and run



In [2]:

    
from matplotlib import pyplot as plt
# Getting the data from the file ( not the efficent way use data  frames from pandas)
import numpy
from sklearn.linear_model import LinearRegression
# Getting the data from the sampledata csv file
def data_from_file():
    with open("sampledata.csv", "r") as filedata:
        return [line.strip() for line in filedata.readlines()[1:] ] # Wihtout the column names


#print([line.split(",")[0] for line in data_from_file()])
#exit(0)
def get_X():
#     new_x  = []
#     for element in numpy.array([line.split(",")[1] for line in data_from_file()]): 
#         new_x.append(numpy.array(element))
    return  [[6],[8],[10],[14],[18]] 

def get_Y():
#     new_Y = []
#     for element in numpy.array(numpy.array([line.split(",")[1] for line in data_from_file()]):
#         new_Y.append(numpy.array(element))
                               
    return [[7],[9],[13],[17.5],[18]]

def visualize_given_data():
    instance_number = numpy.array([line.split(",")[0] for line in data_from_file()])
    X = get_X()  # Is an exploratory variable
    Y = get_Y()  # is response variable !
    #print(type(X), type(Y)) # Just printing the type of the variabel !

    plt.figure() # This intialized the figure
    plt.title("PIZZA PRICE AGAINST DIAMETER ")
    plt.xlabel('DIAMETER IN INCHES ') # setting the x label
    plt.ylabel('Price in dollars') # setting the y label
    plt.plot(X, Y,'k.') # Plotting the Exploratory variable and the response variable : "k." denoted the points implacments
    plt.axis([0, 25, 0, 25])
    plt.grid(True) # This displayed the grids in the graph
    plt.show() # generating the graph !

def apply_linear_regression():
    X = get_X()
    Y = get_Y() # getting the Y and the X values
    model = LinearRegression()
    model.fit(X,Y) # this fits the Exploratory variable and the response variable !
    return model
# def visualize_sample_data(plt = None, X,Y):
#     instance_number = numpy.array([line.split(",")[0] for line in data_from_file()])
#     #X = get_X()  # Is an exploratory variable
#     #Y = get_Y()  # is response variable !
#     #print(type(X), type(Y)) # Just printing the type of the variabel !

#     plt.figure() # This intialized the figure
#     plt.title("PIZZA PRICE AGAINST DIAMETER ")
#     plt.xlabel('DIAMETER IN INCHES ') # setting the x label
#     plt.ylabel('Price in dollars') # setting the y label
#     plt.plot(X, Y,'k.') # Plotting the Exploratory variable and the response variable : "k." denoted the points implacments
#     plt.axis([0, 25, 0, 25])
#     plt.grid(True) # This displayed the grids in the graph
#     return plt



In [3]:

    
visualize_given_data()



In [4]:

    
print(get_X(),get_Y())









    



([[6], [8], [10], [14], [18]], [[7], [9], [13], [17.5], [18]])



In [5]:

    
model = apply_linear_regression()



In [6]:

    
import random
model.predict(12)[0][0]
X_input = []
Y_output = []
for i in range(1,10):
    print("Prediction for the value : ", i)
    X_input.append(i)
    print("The Predicted Value from the model is : ",model.predict(i)[0])
    Y_output.append(model.predict(i)[0][0])









    



('Prediction for the value : ', 1)
('The Predicted Value from the model is : ', array([ 2.94181034]))
('Prediction for the value : ', 2)
('The Predicted Value from the model is : ', array([ 3.91810345]))
('Prediction for the value : ', 3)
('The Predicted Value from the model is : ', array([ 4.89439655]))
('Prediction for the value : ', 4)
('The Predicted Value from the model is : ', array([ 5.87068966]))
('Prediction for the value : ', 5)
('The Predicted Value from the model is : ', array([ 6.84698276]))
('Prediction for the value : ', 6)
('The Predicted Value from the model is : ', array([ 7.82327586]))
('Prediction for the value : ', 7)
('The Predicted Value from the model is : ', array([ 8.79956897]))
('Prediction for the value : ', 8)
('The Predicted Value from the model is : ', array([ 9.77586207]))
('Prediction for the value : ', 9)
('The Predicted Value from the model is : ', array([ 10.75215517]))



In [7]:

    
## THE LINEAR REGRESSION IS AN ESTIMATOR HERE, SINCE ITS ESTIMATING THE PRICE WHICH WOULD BE FOR THE PIZZA
## GETTING THE RESIDULA SUM OF SQUARES FOR OUR MODEL

Residal sum of squares



In [8]:

    
import numpy as np # Importing the numpy module 
X = numpy.array(get_X())
Y = numpy.array(get_Y())
print("Residual Sum of squares : %.2f"%np.mean((model.predict(X)-Y)**2))
# This Gets the residal sum of squares









    



Residual Sum of squares : 1.75



In [9]:

    
from __future__ import division # Importing the python 3 division package
print(2/3) # Will Prints the float results even the integer is being divided









    



0.666666666667

# MEAN OF A NUMPY ARRAY



In [11]:

    
mean_x  = np.mean(X)
print(mean_x) # This prints the mean value

VARIANCE OF X ---- (X - MEANX )^2 DIVIDED n-1



In [12]:

    
# For calculating the variance for the model 
K = [(x-mean_x)**2 for x in X] # This is x-mean_x complete list which would be used in the variance calculation
print(K)
variance = sum(K)/(len(X)-1)









    



[array([ 27.04]), array([ 10.24]), array([ 1.44]), array([ 7.84]), array([ 46.24])]



In [13]:

    
print(variance) # Gets the variance from the model .  
var_x = variance

VARIANCE DIRECTLY CALCULATION THROUGH NUMPY



In [14]:

    
#ALTERNATIVE WAY FOR CALCULATING THE MEAN
print("The variance of x is : ",np.var(X,ddof = 1)) # the ddof keyword is Bassel Correctiong the sample varience # This is optimized way









    



('The variance of x is : ', 23.199999999999999)

Calculating COVARIANCE MANUALLY : SUM OF ALL ((X-MEANX) * (Y-MEANY)) DIVIDED BY N - 1



In [15]:

    
# Covarience is how Two variables change together so (x-mean_X) * (y-mean_Y)
# KEEP IN MIND IN HERE WE DON'T NEED TO DO THE SQUARED OF THE VARIABLES MODEL X AND Y
mean_X = np.mean(X)
mean_Y = np.mean(Y) # gets the mean of the Y : response variables 
K = [(x - mean_X) for x in X] # For all the mean of the X
K_2 = [(y-mean_Y) for y in Y] # For all the mean of the Y
Resulted_product  =  [(K[i]*K_2[i]) for i in range(len(X))] # the Resulting product for each of the one Multiplited 
print("The mean of x is ",mean_X) # prints the mean of X
print("The mean of y is :",mean_Y) # Just for debugging printing the mena of the Y value 
print("List of all X - meanX", K)
print("List of all Y - meanY",K_2)
print("Covariance : ", (sum(Resulted_product)/ (len(Resulted_product) - 1))[0])









    



('The mean of x is ', 11.199999999999999)
('The mean of y is :', 12.9)
('List of all X - meanX', [array([-5.2]), array([-3.2]), array([-1.2]), array([ 2.8]), array([ 6.8])])
('List of all Y - meanY', [array([-5.9]), array([-3.9]), array([ 0.1]), array([ 4.6]), array([ 5.1])])
('Covariance : ', 22.649999999999999)

Usnig numpy.cov() # function for the variance calculation



In [16]:

    
flat_listX = [float(item[0])  for item in numpy.ndarray.tolist(X)]
flat_listY = [float(item[0]) for item in numpy.ndarray.tolist(Y)]
print("Flated X list :",flat_listX)
print("Flated Y list : ",flat_listY)
coV= np.cov(flat_listX,flat_listY) # keep in mind that both X and Y are the list and all the values are calculated by the numpy for this
print("The Covariance is : ",coV)









    



('Flated X list :', [6.0, 8.0, 10.0, 14.0, 18.0])
('Flated Y list : ', [7.0, 9.0, 13.0, 17.5, 18.0])
('The Covariance is : ', array([[ 23.2 ,  22.65],
       [ 22.65,  24.3 ]]))



In [17]:

    
## BETA = COVARIANCE(X,Y) / VARIANCE(X)      :: X -> Exploratory Variable  Y--> Responce Variable
## keep in mind that BETA is --> Y = Beta * x + Alpha  ( kind of m : slope)



In [18]:

    
Beta = coV[0] / var_x
print("Beta :",Beta[1])









    



('Beta :', 0.9762931034482758)

FOR CALCULATING ALPHA : WE CAN SOLVE IT BY : alpha = Y - bETA *X



In [19]:

    
Alpha = mean_Y - Beta * mean_X
print("ALPHA IS : ", Alpha[1]) # prints the value for the alpha .









    



('ALPHA IS : ', 1.9655172413793114)



In [20]:

    
print(1.965517 + 0.97629*18.0) # Using the formulae Alpha and beta
print(model.predict(18.0)) # predicting manually # CROSS VALIDATION









    



19.538737
[[ 19.5387931]]

EVALUATING THE MODEL USING : R SQUARED : keep in mind that the value of rsquared is in 0-1 , and it is not pearson r square .



In [21]:

    
## SS = SUM OF ALL ( y_value - MEAN_Y) ^ 2
## SS_res = SUM OF ALL ( y - f(xi))    # Here the f(xi) --> linear Regression Predicted value of Y obtained from X



In [22]:

    
# ANOTHER DATA SET FOR REGRESSION ! 
diameter_in_inches = np.array([8,9,11,16,12])
observed_price = np.array([11,8.5,15,18,11])
predicted_price = np.array ([9.7759,10.7522,12.7048,17.5863,13.6811])
print(diameter_in_inches)
print(np.mean(observed_price))









    



[ 8  9 11 16 12]
12.7



In [23]:

    
# R squared : How well the observed value of the response variable predicted the model.
SS_tot = sum((observed_price - np.mean(observed_price))**2) # SS_TOTAL value is calculated by the predected value and the Intial Value of the y 
SS_res = sum((observed_price - predicted_price)**2) # Taking the square as well after the calculation
print("SS_TOTAL : ",SS_tot)
print("SS_RESIDUAL :",SS_res)
# R Squared = 1 - SS_res / ss_tot
R = 1 - SS_res/SS_tot
print("R Squared r :", R)









    



('SS_TOTAL : ', 56.799999999999997)
('SS_RESIDUAL :', 19.198213590000002)
('R Squared r :', 0.66200328186619717)



In [24]:

    
X_test = diameter_in_inches#[[8],[9],[11],[16],[12]]
Y_test = observed_price #[[11],[8.5],[15],[18],[11]]
print("The value of R square is : ", model.score(np.array(X_test).reshape(-1,1),np.array(Y_test).reshape(-1,1)))









    



('The value of R square is : ', 0.6620052929422553)

MULTI LINEAR REGRESSION



In [25]:

    
# FOR HAVING THE MULTI LINEAR REGRESSSION , KEEP IN MIND THAT PREDICTED VALUE IS :
## AS IN Y = ALPHA + BETA1X1  + BETA2X2 ..... 
# SINCE IN GENERAL : Y = X BETA
# AND IN MATRIXES FORM [Y1;Y2 ; ...YN] = [ 1, X ; 1,X2;...]   * [ALPHA ; BETA ]
# @SO : For minimizing the cost Function : 
#  BETA  =  (X^T   *  X ) ^ -1 * X^T * Y



In [26]:

    
X = [[1,6,2],[1,8,1],[1,10,0],[1,14,2],[1,18,0]] # EXPLORATORY VARIABLES 2 : --> 
Y_response  = [[7],[9],[13],[17.5],[18]] # Response variable for the specified Exploratry variables !
# APPLYING THE TRANSPOSE ON DATA SET

TANSPOSE OF THE MATRIX



In [27]:

    
from numpy import dot,transpose # Finding the transpose ofthe numpy !
transpose_X =  transpose(X) # having the dataset TRANSPOSED
print("ORIGINAL DATASET : ",X)
print("Transposed data set",transpose_X)









    



('ORIGINAL DATASET : ', [[1, 6, 2], [1, 8, 1], [1, 10, 0], [1, 14, 2], [1, 18, 0]])
('Transposed data set', array([[ 1,  1,  1,  1,  1],
       [ 6,  8, 10, 14, 18],
       [ 2,  1,  0,  2,  0]]))



In [28]:

    
from numpy.linalg import inv # inverse of the matrix header file 
# SO FOR THE VALUE OF BETA FROM NORMAL EQUATION ITS : 
#  BETA  =  (X^T   *  X ) ^ -1 * X^T * Y
Beta = dot( inv(   dot(transpose_X, X))   , dot(transpose_X,Y_response)) # 
print(Beta) # prints the gvalue of the bea









    



[[ 1.1875    ]
 [ 1.01041667]
 [ 0.39583333]]

NUMPY FUNCTION : lstsq(X,Y) :: provides least square functions



In [29]:

    
from numpy.linalg import lstsq
print(lstsq(X,Y_response)[0]) # This prints the least square for the function : prints the best least square









    



[[ 1.1875    ]
 [ 1.01041667]
 [ 0.39583333]]



In [30]:

    
from matplotlib import pyplot as plt # Importing the data visualization library from the web
model = LinearRegression() #Creating the set of model forim the data set  
model.fit(X,Y) # using the original Dataset to perform the model on 
plot1 = plt
plot1.figure()
plot1.plot(X,'k.',label="X TRAINING")
plot1.plot(Y,'k.', label = "Y TRAINING")
plot1.xlabel("EXPLORATORY VARIABLE E")
plot1.ylabel("RESPONSE VARIABLE: Y")
plot1.title("PIZZA MACHINE LEARNING")
plot1.plot(X,Y)
plot1.legend()
plot1.show()



In [31]:

    
X = [[6],[8],[10],[14],[18]]
Y = [[7],[9],[13],[17.5],[18]]
X_Test = [[8],[9],[11],[16],[12]]
Y = [[11],[8.5],[15],[18],[11]]
model = LinearRegression()
model.fit(X,Y) # Fitting the single parameter 
Predicted_Y = model.predict(X_Test) # predictiing the complete Testing Variable
for i,predicted in enumerate(Predicted_Y):
    print("PREDICTED : ", predicted," TARGET :",Predicted_Y[i])
print("R Squared Erro : ",model.score(X_Test,Y))
plot11 = plt # Having the plotting for the simple linear regression with one parameter s
plot11.plot(X,Y,"k.")
plot11.grid(True)
plot11.axis("on") 
plot11.show()









    



('PREDICTED : ', array([ 11.9137931]), ' TARGET :', array([ 11.9137931]))
('PREDICTED : ', array([ 12.15948276]), ' TARGET :', array([ 12.15948276]))
('PREDICTED : ', array([ 12.65086207]), ' TARGET :', array([ 12.65086207]))
('PREDICTED : ', array([ 13.87931034]), ' TARGET :', array([ 13.87931034]))
('PREDICTED : ', array([ 12.89655172]), ' TARGET :', array([ 12.89655172]))
('R Squared Erro : ', 0.29010083464520775)

# MULTI LINEAR OPTIMIZED NUMPY REGRESSION



In [72]:

    
plot2 = plt
plot3 = plt # For the third plot 
X = [[6,2],[8,1],[10,0],[14,2],[18,0]] # X axis data set
Y = [[7],[9],[13],[17.5],[18]]
plot2.plot(X,Y,"k.")
plot2.plot(X,Y,label="Test X - Y ")
model = LinearRegression() # Importing the linear Regression from the LR module
model.fit(X,Y)  # Fitting out training data in the model 
X_Test = [[8,2],[9,0],[11,2],[16,2],[12,0]] # this is out test data set to check the difference or the Least Square error we face 
Y_Test = [[11],[8.5],[15],[18],[11]]
# PRECICTION OF THE TEST SET

plot3.plot(X_Test,Y_Test,'k.')
plot3.plot(X_Test,Y_Test)
prediction_X_Test = model.predict(X_Test) # gets the predicted Y values for all the test values of the X
for count, prediction in enumerate(prediction_X_Test):
    print ("Predicted: %s  Target : %s"%(prediction, Y_Test[count])) # printing the Prediction and the expected values 
plot2.plot(X_Test,prediction_X_Test,label = "Predected X") # Plotting the Predicted values !
plot2.plot(prediction_X_Test,label = "Predected Y") # Plotting the Predicted values !


# handles, labels = plot2.get_legend_handles_labels()
# lgd = plot2.legend(handles, labels, loc='upper center', bbox_to_anchor=(0.5,-0.1))
plot2.grid(True)
plot2.legend() # Calls in the legends
plot2.show()#  showing the plot !
plot3.grid(True)
#plot3.legend()
plot3.show()
print("R Sq")
print("R Squared Value IS : ",model.score(X_Test,Y_Test))









    



Predicted: [ 10.0625]  Target : [11]
Predicted: [ 10.28125]  Target : [8.5]
Predicted: [ 13.09375]  Target : [15]
Predicted: [ 18.14583333]  Target : [18]
Predicted: [ 13.3125]  Target : [11]






    












    












    



R Sq
('R Squared Value IS : ', 0.77016777313184681)

POLYNOMIAL REGRESSION



In [73]:

    
# Since the quadratic equation adds in the Y = Alpha + Beta1 X + BEta2X^2



In [74]:

    
# Since now in this case we would have the third model --> beta2 X^2  ad this value of x remains the same althrough the variation of 
#Square is taken in this case to get the data 
import numpy as np # Numpy conventions 
from matplotlib import pyplot as plt # For Visualiation
from sklearn.linear_model import LinearRegression # Linear Regression Model accessing .
from sklearn.preprocessing import PolynomialFeatures #Extracts the polynomial from the given feature



In [75]:

    
# DATA SETS
X_train = [[6],[8],[10],[14],[18]]
Y_train = [[7],[9],[13],[17.5],[18]]
X_test = [[6],[8],[11],[16]]
Y_test = [[8],[12],[15],[18]]
regressor = LinearRegression() # Linear Regression model
regressor.fit(X_train,Y_train) # Fitting the model for the regressor
# Creating a test sample from 0 - 26 : dividing to 100 points 
xx =np.linspace(0, 26, 100) # Having the line space 
# print(xx)  Checking the output for xx generated
# xx.reshape(xx.shape[0],1) This will convert to a Numpy 2D --> 1 dimentional Array ( list of list )
yy = regressor.predict(xx.reshape(xx.shape[0],1)) # Predicts the values for the xx linespace 
plt.plot(xx,yy) # Plotting the Graph
plt.show() # Showing on  the IDE/GUI



In [76]:

    
from sklearn.preprocessing import PolynomialFeatures #Extracts the polynomial from the given feature
quadratic_featurizer =  PolynomialFeatures(degree = 2) # Creating a Polynomial Feature based on the Degree : Degree
# Degree represents the X^2 , X^3 stuff
X_train_quadratic = quadratic_featurizer.fit_transform(X_train) # fir transform first fits the model and then Transforms#
# IN such a way that the Transformation is done after firtting in the feature model.
X_test_quadratic = quadratic_featurizer.transform(X_test) # This just transforms the X_test Data to plynomial Features (row by row)
#print(X_train_quadratic)
#print(X_test_quadratic)



In [77]:

    
# LINEAR MODEL --> FITTING THE MODEL ON X_TRAIN (QUADRATIC) :--> having the x^2 Quadratic feature 
regressor_quadtratic = LinearRegression()# Creating the Linear Regression model
regressor_quadtratic.fit(X_train_quadratic, Y_train) # Applying the linear Regression on teh Polynomialozed Feature X and the Y training set
xx_quadratic = quadratic_featurizer.transform(xx.reshape(xx.shape[0],1)) # transforms the data set to polynomial one

#print(xx_quadratic ) # Check the Output for more cleareance : this is the output for the xx data reshaped (100 samples)



In [78]:

    
plt.plot(xx,regressor_quadtratic.predict(xx_quadratic), c = 'r', linestyle = '--',label = "Linear Regression : 1")  # line stype represents : drawing style
# The c represents color of the graph line .
plt.plot(xx,yy, label = "Random Range data")
plt.title("PIZZA PRICE REGRESSED ON BASIS OF DIAMETER")
plt.xlabel("DIAMETER IN INCHES ")
plt.ylabel("PRICE IN DOLLAR")
plt.axis([0,25,0,25])
plt.grid(True)
plt.scatter(X_train,Y_train)
plt.plot(X_train,Y_train,label = "Train data line")
plt.legend() # Showing the legends of the data
plt.show() # Showing the Plotted data at the end



In [79]:

    
print(X_train) # A single 5 elements
print(X_train_quadratic) # 5 *3 Matrix
print(X_test) # A single 4 element 
print(X_test_quadratic) # Quadractried  x test
print("Simple Linear Regression R-Squared : ",regressor.score(X_test, Y_test )) # Using the Linear Model for the regression scoring
print("Quadratic Regression R- Squared :",regressor_quadtratic.score(X_test_quadratic, Y_test)) # Score of the Quadratic one as well









    



[[6], [8], [10], [14], [18]]
[[   1.    6.   36.]
 [   1.    8.   64.]
 [   1.   10.  100.]
 [   1.   14.  196.]
 [   1.   18.  324.]]
[[6], [8], [11], [16]]
[[   1.    6.   36.]
 [   1.    8.   64.]
 [   1.   11.  121.]
 [   1.   16.  256.]]
('Simple Linear Regression R-Squared : ', 0.80972679770766498)
('Quadratic Regression R- Squared :', 0.86754436563450543)



In [92]:

    
## PLOTTING WITH A CUBIC POLYNOMIAL FEATURE "
# Cubic_feature = PolynomialFeatures(degree = 3) # FOR THE 3RD DEGREE POLYNOMIAL
# xxx_train = Cubic_feature.fit_transform(X_train) # For the transformation of the X_training data in the Cubical Feature
# xxx_test = Cubic_feature.transform(X_test)
# regresser_cubic = LinearRegression() # for the Regression of the cubic model 
# regresser_cubic.fit(xxx_train,Y_train) # Cubic Transformation of the Data x train to the one  
# print(regresser_cubic.predict(xxx_train))
# rc = Cubic_feature.transform(xxx_train.shape[0])
# plt.plot(rc,regresser_cubic.predict(xxx_train),label="cubic polynomial Degree")
# plt.show()



In [ ]: