In [172]:
#libraries used
import pandas as pd
import numpy as np
from sklearn import linear_model
import matplotlib.pyplot as plt
%matplotlib inline
In [72]:
#read data
df = pd.read_fwf('linear_regression_demo/brain_body.txt')
x_values = df[['Brain']]
y_values = df[['Body']]
#train model on data
body_reg = linear_model.LinearRegression()
body_reg.fit(x_values, y_values)
#visualize results
plt.scatter(x_values, y_values)
plt.plot(x_values, body_reg.predict(x_values))
plt.show()
The weekly challange is to make a prediction of life expectancy from BMI at birth.
The challenge for this video is to use scikit-learn to create a line of best fit for the included 'challenge_dataset'. Then, make a prediction for an existing data point and see how close it matches up to the actual value. Print out the error you get.
In [109]:
#read data
df = pd.read_csv('linear_regression_demo/challenge_dataset.txt', names=['Data','Outcome'])
x_values = df[['Data']]
y_values = df[['Outcome']]
#train model on data
reg = linear_model.LinearRegression()
reg.fit(x_values, y_values)
#visualize results
plt.scatter(x_values, y_values)
plt.plot(x_values, reg.predict(x_values))
plt.show()
So now we have simple trained dataset. now to make a prediction.
In [115]:
df['Predictions'] = reg.predict(x_values)
df["Pred_Error"] = df['Predictions'] - df['Outcome']
df.head()
Out[115]:
In [114]:
# calculating the avg error
e = 0
for i in df['Pred_Error']:
e += abs(i)
e / len(df['Pred_Error'])
Out[114]:
In [124]:
import pandas as pd
from sklearn.linear_model import LinearRegression
# Assign the dataframe to this variable.
# TODO: Load the data
bmi_life_data = pd.read_csv('bmi_and_life_expectancy.csv')
print(bmi_life_data.shape)
bmi_life_data.head()
Out[124]:
In [136]:
# Make and fit the linear regression model
#TODO: Fit the model and Assign it to bmi_life_model
x_vals = bmi_life_data[['BMI']]
y_vals = bmi_life_data[['Life expectancy']]
bmi_life_model = LinearRegression()
bmi_life_model.fit(x_vals, y_vals)
plt.scatter(x_vals, y_vals)
plt.plot(x_vals, bmi_life_model.predict(x_vals))
plt.show()
In [138]:
# Mak a prediction using the model
# TODO: Predict life expectancy for a BMI value of 21.07931
laos_life_exp = bmi_life_model.predict(21.07931)
laos_life_exp
Out[138]:
In [157]:
from sklearn.linear_model import LinearRegression
from sklearn.datasets import load_boston
# Load the data from the the boston house-prices dataset
boston_data = load_boston()
print(boston_data.data[0], boston_data.target[0])
In [162]:
x = boston_data['data']
y = boston_data['target']
# Make and fit the linear regression model
# TODO: Fit the model and Assign it to the model variable
model = LinearRegression()
model.fit(x,y)
# Make a prediction using the model
sample_house = [[2.29690000e-01, 0.00000000e+00, 1.05900000e+01, 0.00000000e+00, 4.89000000e-01,
6.32600000e+00, 5.25000000e+01, 4.35490000e+00, 4.00000000e+00, 2.77000000e+02,
1.86000000e+01, 3.94870000e+02, 1.09700000e+01]]
# TODO: Predict housing price for the sample_house
prediction = model.predict(sample_house)
print(prediction)
In [181]:
#Step 1 - collect our data
df = pd.read_csv('linear_regression_live/data.csv', header=None)
df.head()
Out[181]:
In [192]:
#collect data using numpy
points = np.genfromtxt('linear_regression_live/data.csv', delimiter=',')
points[:5]
Out[192]:
In [235]:
# lets see the data
plt.scatter(df[0], df[1])
plt.show()
Step 2 - define our hyperparameters for the eq y = mx + b (slope formula) how fast should our model converge?
In [185]:
learning_rate = 0.0001
initial_b = 0
initial_m = 0
num_iterations = 1000
Step 3: Train the model
In [230]:
def compute_error_for_line_given_points(b, m , points):
totalError = 0 #initialize error at 0
for i in range(0, len(points)): #for every point
x = points[i, 0] #get x val
y = points[i, 1] #get y val
totalError += (y - (m*x + b)) **2
return totalError / float(len(points))
def gradient_descent_runner(points, starting_b, starting_m, learning_rate, num_iterations):
b = starting_b
m = starting_m
#gradient descent
for i in range(num_iterations):
#update b & m with new more accurate b and m
b, m = step_gradient(b, m, np.array(points), learning_rate)
return [b,m]
def step_gradient(b_current, m_current, points, learningRate):
b_gradient = 0
m_gradient = 0
N = float(len(points))
for i in range(0, len(points)):
x = points[i, 0]
y = points[i, 1]
#direction with respect to b and m
#computing partial deriavitives of our error function
b_gradient += -(2/N) * (y - ((m_current * x) + b_current))
m_gradient += -(2/N) * x * (y - ((m_current * x) + b_current))
#update b and m values using partial derivates
new_b = b_current - (learningRate * b_gradient)
new_m = m_current - (learningRate * m_gradient)
return [new_b, new_m]
In [233]:
print('starting gradient descent at b = {0}, m = {1}, error = {2}'.format(initial_b, initial_m, compute_error_for_line_given_points(initial_b, initial_m, points)))
[b, m] = gradient_descent_runner(points, initial_b, initial_m, learning_rate, num_iterations)
print('ending point after {0} iterations at b = {1}, m = {2}, error = {3}'.format(num_iterations, b, m, compute_error_for_line_given_points(b, m, points)))
In [232]:
b,m
Out[232]:
In [245]:
x_vals = df[[0]]
y_vals = df[[1]]
live_model = LinearRegression()
live_model.fit(x_vals, y_vals)
plt.scatter(x_vals, y_vals)
plt.plot(x_vals, live_model.predict(x_vals))
plt.show()
In [246]:
run gradient_descent.py
In [ ]: