In [30]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets, linear_model
%matplotlib inline

Linear Regressionn on fake generated data


In [31]:
slope = 1
intercept = 2
numRecords = 1000
x = np.linspace(-10,10,numRecords)

mu = 0        # mean of random noise 
sigma = 3     # standard deviation of the random noise 
# add random normal noise with mean (mu) and standard deviation (sigma)
noise = sigma * np.random.randn(1000) + mu  
# generate linear data 
y = slope * x + intercept + noise

plt.scatter(x,y,label = 'fake data')    # plot the fake generated data
plt.xlabel('X')
plt.ylabel('Y')
plt.title('Linear generated data')

x.shape = (x.shape[0],1)
y.shape = (y.shape[0],1)
temp = np.ones_like(y)           # create vector of ones 
temp.shape = (y.shape[0],1)      # change the shape of the vector 
x = np.hstack((temp,x)) 

# linear regression equation { (X^t * x)^-1 * x^t * y  } ; x: data , y: target
coeffecients = np.dot(np.linalg.inv(np.dot(x.T,x)),np.dot(x.T,y))
print 'Coefficients: {v}\n'.format(v = coeffecients[1:])
print 'Bias: {v} \n'.format(v = coeffecients[0])

yPredicted = coeffecients[1:] * x + coeffecients[0] # linear regression model
plt.plot(x,yPredicted,color = 'r', lw = 4, label = 'predected relation') # plot predicted model

plt.legend()
plt.show()


Coefficients: [[ 1.00548893]]

Bias: [ 2.15164624] 

Linear Regression using Sklearn package


In [9]:
lr = linear_model.LinearRegression()  # initialize linear regression model 

diabetes = datasets.load_diabetes()   # load diabetes dataset
data = diabetes.data                  # extract feature matrix
target = diabetes.target              # extract target vector

# Train the model using the training sets
lr.fit(data,target)

print 'Coefficients: {v}\n'.format(v = lr.coef_)
print 'Bias: {v} \n'.format(v = lr.intercept_)


Coefficients: [ -10.01219782 -239.81908937  519.83978679  324.39042769 -792.18416163
  476.74583782  101.04457032  177.06417623  751.27932109   67.62538639]

Bias: 152.133484163 

Linear Regression using plain python on multidimensional data


In [6]:
diabetes = datasets.load_diabetes()   # load diabetes dataset
data = diabetes.data                  # extract feature matrix
target = diabetes.target              # extract target vector 
temp = np.ones_like(target)           # create vector of ones 
temp.shape = (target.shape[0],1)      # change the shape of the vector 
data = np.hstack((temp,data))         # generate array of ones with the same shape as target
# linear regression equation { (X^t * x)^-1 * x^t * y  } ; x: data , y: target
coeffecients = np.dot(np.linalg.inv(np.dot(data.T,data)),np.dot(data.T,target))
print 'Coefficients: {v}\n'.format(v = coeffecients[1:])
print 'Bias: {v} \n'.format(v = coeffecients[0])


Coefficients: [ -10.01219782 -239.81908937  519.83978679  324.39042769 -792.18416163
  476.74583782  101.04457032  177.06417623  751.27932109   67.62538639]

Bias: 152.133484163