notebook.community

Edit and run



In [16]:

    
import numpy
import pandas

# def normalize_features(array):
#     """
#     Normalize the features in our data set.
#     """
#     array_normalized = (array - array.mean())/array.std()
#     mu = array.mean()
#     sigma = array.std()
    
#     return array_normalized, mu, sigma

def compute_cost(features, values, theta):
    """
    Compute the cost of a list of parameters, theta, given a list of features (input 
data points) and values (output data points).
    """
    m = len(values)
    sum_of_square_errors = numpy.square(numpy.dot(features, theta) - values).sum()
    cost = sum_of_square_errors / (2*m)
    return cost

def gradient_descent(features, values, theta, alpha, num_iterations):
    """
    Perform gradient descent given a data set with an arbitrary number of features.
    """

    # Write code here that performs num_iterations updates to the elements of theta.
    # times. Every time you compute the cost for a given list of thetas, append it 
    # to cost_history.
    m = len(values)
    cost_history = []

    ###########################
    ### YOUR CODE GOES HERE ###
    ###########################
    for i in range(num_iterations):
        predicted_values = numpy.dot(features, theta)
        theta = theta - alpha / m * numpy.dot((predicted_values - values), features)
        
        cost = compute_cost(features, values, theta)
        cost_history.append(cost)
    return theta, pandas.Series(cost_history) # leave this line for the grader

# if __name__ == '__main__':
#     # Read data into a pandas dataframe.
#     data = pandas.read_csv("C:/Vindico/Projects/Data/Course/Python/Udacity/Introduction to Data Science/Lesson 3/baseball_data.csv")
    
#     # Isolate features / values.
#     features = data[['height', 'weight']]
#     values = data[['HR']]
#     m = len(values)
    
#     # Normalize features.
#     features, mu, sigma = normalize_features(features)



In [7]:

    
import numpy
import pandas

def compute_cost(features, values, theta):
    """
    Compute the cost of a list of parameters, theta, given a list of features (input 
data points) and values (output data points).
    """
    m = len(values)
    sum_of_square_errors = numpy.square(numpy.dot(features, theta) - values).sum()
    cost = sum_of_square_errors / (2*m)
    return cost

def gradient_descent(features, values, theta, alpha, num_iterations):
    """
    Perform gradient descent given a data set with an arbitrary number of features.
    """

    # Write code here that performs num_iterations updates to the elements of theta.
    # times. Every time you compute the cost for a given list of thetas, append it 
    # to cost_history.
    
    cost_history = []

    ###########################
    ### YOUR CODE GOES HERE ###
    ###########################
    m = len(values) * 1.0
    for i in range(0,num_iterations):
        # Calculate cost
        cost = compute_cost(features, values, theta)

        # Append cost to history
        cost_history.append(cost)

        # Calculate new theta
        theta = theta + alpha * (1/m) * numpy.dot((values - numpy.dot(features,theta)),features)
    
    return theta, pandas.Series(cost_history) # leave this line for the grader