In [2]:
from matplotlib import pyplot as plt
# Getting the data from the file ( not the efficent way use data frames from pandas)
import numpy
from sklearn.linear_model import LinearRegression
# Getting the data from the sampledata csv file
def data_from_file():
with open("sampledata.csv", "r") as filedata:
return [line.strip() for line in filedata.readlines()[1:] ] # Wihtout the column names
#print([line.split(",")[0] for line in data_from_file()])
#exit(0)
def get_X():
# new_x = []
# for element in numpy.array([line.split(",")[1] for line in data_from_file()]):
# new_x.append(numpy.array(element))
return [[6],[8],[10],[14],[18]]
def get_Y():
# new_Y = []
# for element in numpy.array(numpy.array([line.split(",")[1] for line in data_from_file()]):
# new_Y.append(numpy.array(element))
return [[7],[9],[13],[17.5],[18]]
def visualize_given_data():
instance_number = numpy.array([line.split(",")[0] for line in data_from_file()])
X = get_X() # Is an exploratory variable
Y = get_Y() # is response variable !
#print(type(X), type(Y)) # Just printing the type of the variabel !
plt.figure() # This intialized the figure
plt.title("PIZZA PRICE AGAINST DIAMETER ")
plt.xlabel('DIAMETER IN INCHES ') # setting the x label
plt.ylabel('Price in dollars') # setting the y label
plt.plot(X, Y,'k.') # Plotting the Exploratory variable and the response variable : "k." denoted the points implacments
plt.axis([0, 25, 0, 25])
plt.grid(True) # This displayed the grids in the graph
plt.show() # generating the graph !
def apply_linear_regression():
X = get_X()
Y = get_Y() # getting the Y and the X values
model = LinearRegression()
model.fit(X,Y) # this fits the Exploratory variable and the response variable !
return model
# def visualize_sample_data(plt = None, X,Y):
# instance_number = numpy.array([line.split(",")[0] for line in data_from_file()])
# #X = get_X() # Is an exploratory variable
# #Y = get_Y() # is response variable !
# #print(type(X), type(Y)) # Just printing the type of the variabel !
# plt.figure() # This intialized the figure
# plt.title("PIZZA PRICE AGAINST DIAMETER ")
# plt.xlabel('DIAMETER IN INCHES ') # setting the x label
# plt.ylabel('Price in dollars') # setting the y label
# plt.plot(X, Y,'k.') # Plotting the Exploratory variable and the response variable : "k." denoted the points implacments
# plt.axis([0, 25, 0, 25])
# plt.grid(True) # This displayed the grids in the graph
# return plt
In [3]:
visualize_given_data()
In [4]:
print(get_X(),get_Y())
In [5]:
model = apply_linear_regression()
In [6]:
import random
model.predict(12)[0][0]
X_input = []
Y_output = []
for i in range(1,10):
print("Prediction for the value : ", i)
X_input.append(i)
print("The Predicted Value from the model is : ",model.predict(i)[0])
Y_output.append(model.predict(i)[0][0])
In [7]:
## THE LINEAR REGRESSION IS AN ESTIMATOR HERE, SINCE ITS ESTIMATING THE PRICE WHICH WOULD BE FOR THE PIZZA
## GETTING THE RESIDULA SUM OF SQUARES FOR OUR MODEL
In [8]:
import numpy as np # Importing the numpy module
X = numpy.array(get_X())
Y = numpy.array(get_Y())
print("Residual Sum of squares : %.2f"%np.mean((model.predict(X)-Y)**2))
# This Gets the residal sum of squares
In [9]:
from __future__ import division # Importing the python 3 division package
print(2/3) # Will Prints the float results even the integer is being divided
In [11]:
mean_x = np.mean(X)
print(mean_x) # This prints the mean value
In [12]:
# For calculating the variance for the model
K = [(x-mean_x)**2 for x in X] # This is x-mean_x complete list which would be used in the variance calculation
print(K)
variance = sum(K)/(len(X)-1)
In [13]:
print(variance) # Gets the variance from the model .
var_x = variance
In [14]:
#ALTERNATIVE WAY FOR CALCULATING THE MEAN
print("The variance of x is : ",np.var(X,ddof = 1)) # the ddof keyword is Bassel Correctiong the sample varience # This is optimized way
In [15]:
# Covarience is how Two variables change together so (x-mean_X) * (y-mean_Y)
# KEEP IN MIND IN HERE WE DON'T NEED TO DO THE SQUARED OF THE VARIABLES MODEL X AND Y
mean_X = np.mean(X)
mean_Y = np.mean(Y) # gets the mean of the Y : response variables
K = [(x - mean_X) for x in X] # For all the mean of the X
K_2 = [(y-mean_Y) for y in Y] # For all the mean of the Y
Resulted_product = [(K[i]*K_2[i]) for i in range(len(X))] # the Resulting product for each of the one Multiplited
print("The mean of x is ",mean_X) # prints the mean of X
print("The mean of y is :",mean_Y) # Just for debugging printing the mena of the Y value
print("List of all X - meanX", K)
print("List of all Y - meanY",K_2)
print("Covariance : ", (sum(Resulted_product)/ (len(Resulted_product) - 1))[0])
In [16]:
flat_listX = [float(item[0]) for item in numpy.ndarray.tolist(X)]
flat_listY = [float(item[0]) for item in numpy.ndarray.tolist(Y)]
print("Flated X list :",flat_listX)
print("Flated Y list : ",flat_listY)
coV= np.cov(flat_listX,flat_listY) # keep in mind that both X and Y are the list and all the values are calculated by the numpy for this
print("The Covariance is : ",coV)
In [17]:
## BETA = COVARIANCE(X,Y) / VARIANCE(X) :: X -> Exploratory Variable Y--> Responce Variable
## keep in mind that BETA is --> Y = Beta * x + Alpha ( kind of m : slope)
In [18]:
Beta = coV[0] / var_x
print("Beta :",Beta[1])
In [19]:
Alpha = mean_Y - Beta * mean_X
print("ALPHA IS : ", Alpha[1]) # prints the value for the alpha .
In [20]:
print(1.965517 + 0.97629*18.0) # Using the formulae Alpha and beta
print(model.predict(18.0)) # predicting manually # CROSS VALIDATION
In [21]:
## SS = SUM OF ALL ( y_value - MEAN_Y) ^ 2
## SS_res = SUM OF ALL ( y - f(xi)) # Here the f(xi) --> linear Regression Predicted value of Y obtained from X
In [22]:
# ANOTHER DATA SET FOR REGRESSION !
diameter_in_inches = np.array([8,9,11,16,12])
observed_price = np.array([11,8.5,15,18,11])
predicted_price = np.array ([9.7759,10.7522,12.7048,17.5863,13.6811])
print(diameter_in_inches)
print(np.mean(observed_price))
In [23]:
# R squared : How well the observed value of the response variable predicted the model.
SS_tot = sum((observed_price - np.mean(observed_price))**2) # SS_TOTAL value is calculated by the predected value and the Intial Value of the y
SS_res = sum((observed_price - predicted_price)**2) # Taking the square as well after the calculation
print("SS_TOTAL : ",SS_tot)
print("SS_RESIDUAL :",SS_res)
# R Squared = 1 - SS_res / ss_tot
R = 1 - SS_res/SS_tot
print("R Squared r :", R)
In [24]:
X_test = diameter_in_inches#[[8],[9],[11],[16],[12]]
Y_test = observed_price #[[11],[8.5],[15],[18],[11]]
print("The value of R square is : ", model.score(np.array(X_test).reshape(-1,1),np.array(Y_test).reshape(-1,1)))
In [25]:
# FOR HAVING THE MULTI LINEAR REGRESSSION , KEEP IN MIND THAT PREDICTED VALUE IS :
## AS IN Y = ALPHA + BETA1X1 + BETA2X2 .....
# SINCE IN GENERAL : Y = X BETA
# AND IN MATRIXES FORM [Y1;Y2 ; ...YN] = [ 1, X ; 1,X2;...] * [ALPHA ; BETA ]
# @SO : For minimizing the cost Function :
# BETA = (X^T * X ) ^ -1 * X^T * Y
In [26]:
X = [[1,6,2],[1,8,1],[1,10,0],[1,14,2],[1,18,0]] # EXPLORATORY VARIABLES 2 : -->
Y_response = [[7],[9],[13],[17.5],[18]] # Response variable for the specified Exploratry variables !
# APPLYING THE TRANSPOSE ON DATA SET
In [27]:
from numpy import dot,transpose # Finding the transpose ofthe numpy !
transpose_X = transpose(X) # having the dataset TRANSPOSED
print("ORIGINAL DATASET : ",X)
print("Transposed data set",transpose_X)
In [28]:
from numpy.linalg import inv # inverse of the matrix header file
# SO FOR THE VALUE OF BETA FROM NORMAL EQUATION ITS :
# BETA = (X^T * X ) ^ -1 * X^T * Y
Beta = dot( inv( dot(transpose_X, X)) , dot(transpose_X,Y_response)) #
print(Beta) # prints the gvalue of the bea
In [29]:
from numpy.linalg import lstsq
print(lstsq(X,Y_response)[0]) # This prints the least square for the function : prints the best least square
In [30]:
from matplotlib import pyplot as plt # Importing the data visualization library from the web
model = LinearRegression() #Creating the set of model forim the data set
model.fit(X,Y) # using the original Dataset to perform the model on
plot1 = plt
plot1.figure()
plot1.plot(X,'k.',label="X TRAINING")
plot1.plot(Y,'k.', label = "Y TRAINING")
plot1.xlabel("EXPLORATORY VARIABLE E")
plot1.ylabel("RESPONSE VARIABLE: Y")
plot1.title("PIZZA MACHINE LEARNING")
plot1.plot(X,Y)
plot1.legend()
plot1.show()
In [31]:
X = [[6],[8],[10],[14],[18]]
Y = [[7],[9],[13],[17.5],[18]]
X_Test = [[8],[9],[11],[16],[12]]
Y = [[11],[8.5],[15],[18],[11]]
model = LinearRegression()
model.fit(X,Y) # Fitting the single parameter
Predicted_Y = model.predict(X_Test) # predictiing the complete Testing Variable
for i,predicted in enumerate(Predicted_Y):
print("PREDICTED : ", predicted," TARGET :",Predicted_Y[i])
print("R Squared Erro : ",model.score(X_Test,Y))
plot11 = plt # Having the plotting for the simple linear regression with one parameter s
plot11.plot(X,Y,"k.")
plot11.grid(True)
plot11.axis("on")
plot11.show()
In [72]:
plot2 = plt
plot3 = plt # For the third plot
X = [[6,2],[8,1],[10,0],[14,2],[18,0]] # X axis data set
Y = [[7],[9],[13],[17.5],[18]]
plot2.plot(X,Y,"k.")
plot2.plot(X,Y,label="Test X - Y ")
model = LinearRegression() # Importing the linear Regression from the LR module
model.fit(X,Y) # Fitting out training data in the model
X_Test = [[8,2],[9,0],[11,2],[16,2],[12,0]] # this is out test data set to check the difference or the Least Square error we face
Y_Test = [[11],[8.5],[15],[18],[11]]
# PRECICTION OF THE TEST SET
plot3.plot(X_Test,Y_Test,'k.')
plot3.plot(X_Test,Y_Test)
prediction_X_Test = model.predict(X_Test) # gets the predicted Y values for all the test values of the X
for count, prediction in enumerate(prediction_X_Test):
print ("Predicted: %s Target : %s"%(prediction, Y_Test[count])) # printing the Prediction and the expected values
plot2.plot(X_Test,prediction_X_Test,label = "Predected X") # Plotting the Predicted values !
plot2.plot(prediction_X_Test,label = "Predected Y") # Plotting the Predicted values !
# handles, labels = plot2.get_legend_handles_labels()
# lgd = plot2.legend(handles, labels, loc='upper center', bbox_to_anchor=(0.5,-0.1))
plot2.grid(True)
plot2.legend() # Calls in the legends
plot2.show()# showing the plot !
plot3.grid(True)
#plot3.legend()
plot3.show()
print("R Sq")
print("R Squared Value IS : ",model.score(X_Test,Y_Test))
In [73]:
# Since the quadratic equation adds in the Y = Alpha + Beta1 X + BEta2X^2
In [74]:
# Since now in this case we would have the third model --> beta2 X^2 ad this value of x remains the same althrough the variation of
#Square is taken in this case to get the data
import numpy as np # Numpy conventions
from matplotlib import pyplot as plt # For Visualiation
from sklearn.linear_model import LinearRegression # Linear Regression Model accessing .
from sklearn.preprocessing import PolynomialFeatures #Extracts the polynomial from the given feature
In [75]:
# DATA SETS
X_train = [[6],[8],[10],[14],[18]]
Y_train = [[7],[9],[13],[17.5],[18]]
X_test = [[6],[8],[11],[16]]
Y_test = [[8],[12],[15],[18]]
regressor = LinearRegression() # Linear Regression model
regressor.fit(X_train,Y_train) # Fitting the model for the regressor
# Creating a test sample from 0 - 26 : dividing to 100 points
xx =np.linspace(0, 26, 100) # Having the line space
# print(xx) Checking the output for xx generated
# xx.reshape(xx.shape[0],1) This will convert to a Numpy 2D --> 1 dimentional Array ( list of list )
yy = regressor.predict(xx.reshape(xx.shape[0],1)) # Predicts the values for the xx linespace
plt.plot(xx,yy) # Plotting the Graph
plt.show() # Showing on the IDE/GUI
In [76]:
from sklearn.preprocessing import PolynomialFeatures #Extracts the polynomial from the given feature
quadratic_featurizer = PolynomialFeatures(degree = 2) # Creating a Polynomial Feature based on the Degree : Degree
# Degree represents the X^2 , X^3 stuff
X_train_quadratic = quadratic_featurizer.fit_transform(X_train) # fir transform first fits the model and then Transforms#
# IN such a way that the Transformation is done after firtting in the feature model.
X_test_quadratic = quadratic_featurizer.transform(X_test) # This just transforms the X_test Data to plynomial Features (row by row)
#print(X_train_quadratic)
#print(X_test_quadratic)
In [77]:
# LINEAR MODEL --> FITTING THE MODEL ON X_TRAIN (QUADRATIC) :--> having the x^2 Quadratic feature
regressor_quadtratic = LinearRegression()# Creating the Linear Regression model
regressor_quadtratic.fit(X_train_quadratic, Y_train) # Applying the linear Regression on teh Polynomialozed Feature X and the Y training set
xx_quadratic = quadratic_featurizer.transform(xx.reshape(xx.shape[0],1)) # transforms the data set to polynomial one
#print(xx_quadratic ) # Check the Output for more cleareance : this is the output for the xx data reshaped (100 samples)
In [78]:
plt.plot(xx,regressor_quadtratic.predict(xx_quadratic), c = 'r', linestyle = '--',label = "Linear Regression : 1") # line stype represents : drawing style
# The c represents color of the graph line .
plt.plot(xx,yy, label = "Random Range data")
plt.title("PIZZA PRICE REGRESSED ON BASIS OF DIAMETER")
plt.xlabel("DIAMETER IN INCHES ")
plt.ylabel("PRICE IN DOLLAR")
plt.axis([0,25,0,25])
plt.grid(True)
plt.scatter(X_train,Y_train)
plt.plot(X_train,Y_train,label = "Train data line")
plt.legend() # Showing the legends of the data
plt.show() # Showing the Plotted data at the end
In [79]:
print(X_train) # A single 5 elements
print(X_train_quadratic) # 5 *3 Matrix
print(X_test) # A single 4 element
print(X_test_quadratic) # Quadractried x test
print("Simple Linear Regression R-Squared : ",regressor.score(X_test, Y_test )) # Using the Linear Model for the regression scoring
print("Quadratic Regression R- Squared :",regressor_quadtratic.score(X_test_quadratic, Y_test)) # Score of the Quadratic one as well
In [92]:
## PLOTTING WITH A CUBIC POLYNOMIAL FEATURE "
# Cubic_feature = PolynomialFeatures(degree = 3) # FOR THE 3RD DEGREE POLYNOMIAL
# xxx_train = Cubic_feature.fit_transform(X_train) # For the transformation of the X_training data in the Cubical Feature
# xxx_test = Cubic_feature.transform(X_test)
# regresser_cubic = LinearRegression() # for the Regression of the cubic model
# regresser_cubic.fit(xxx_train,Y_train) # Cubic Transformation of the Data x train to the one
# print(regresser_cubic.predict(xxx_train))
# rc = Cubic_feature.transform(xxx_train.shape[0])
# plt.plot(rc,regresser_cubic.predict(xxx_train),label="cubic polynomial Degree")
# plt.show()
In [ ]: