In [16]:
import numpy
import pandas
# def normalize_features(array):
# """
# Normalize the features in our data set.
# """
# array_normalized = (array - array.mean())/array.std()
# mu = array.mean()
# sigma = array.std()
# return array_normalized, mu, sigma
def compute_cost(features, values, theta):
"""
Compute the cost of a list of parameters, theta, given a list of features (input
data points) and values (output data points).
"""
m = len(values)
sum_of_square_errors = numpy.square(numpy.dot(features, theta) - values).sum()
cost = sum_of_square_errors / (2*m)
return cost
def gradient_descent(features, values, theta, alpha, num_iterations):
"""
Perform gradient descent given a data set with an arbitrary number of features.
"""
# Write code here that performs num_iterations updates to the elements of theta.
# times. Every time you compute the cost for a given list of thetas, append it
# to cost_history.
m = len(values)
cost_history = []
###########################
### YOUR CODE GOES HERE ###
###########################
for i in range(num_iterations):
predicted_values = numpy.dot(features, theta)
theta = theta - alpha / m * numpy.dot((predicted_values - values), features)
cost = compute_cost(features, values, theta)
cost_history.append(cost)
return theta, pandas.Series(cost_history) # leave this line for the grader
# if __name__ == '__main__':
# # Read data into a pandas dataframe.
# data = pandas.read_csv("C:/Vindico/Projects/Data/Course/Python/Udacity/Introduction to Data Science/Lesson 3/baseball_data.csv")
# # Isolate features / values.
# features = data[['height', 'weight']]
# values = data[['HR']]
# m = len(values)
# # Normalize features.
# features, mu, sigma = normalize_features(features)
In [7]:
import numpy
import pandas
def compute_cost(features, values, theta):
"""
Compute the cost of a list of parameters, theta, given a list of features (input
data points) and values (output data points).
"""
m = len(values)
sum_of_square_errors = numpy.square(numpy.dot(features, theta) - values).sum()
cost = sum_of_square_errors / (2*m)
return cost
def gradient_descent(features, values, theta, alpha, num_iterations):
"""
Perform gradient descent given a data set with an arbitrary number of features.
"""
# Write code here that performs num_iterations updates to the elements of theta.
# times. Every time you compute the cost for a given list of thetas, append it
# to cost_history.
cost_history = []
###########################
### YOUR CODE GOES HERE ###
###########################
m = len(values) * 1.0
for i in range(0,num_iterations):
# Calculate cost
cost = compute_cost(features, values, theta)
# Append cost to history
cost_history.append(cost)
# Calculate new theta
theta = theta + alpha * (1/m) * numpy.dot((values - numpy.dot(features,theta)),features)
return theta, pandas.Series(cost_history) # leave this line for the grader