In [4]:
import os
import sys
import datetime as dt

import numpy as np
import pandas as pd
from scipy import stats, constants
from scipy.special import comb, perm, factorial
import statsmodels.api as sm

import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

In [29]:
fp_list = ['C:', 'Users', 'szahn', 'Dropbox', 'Statistics & Machine Learning', 'coursera_ml_notes']
fp = os.sep.join(fp_list)
fp_fig = fp + os.sep + 'Figures'
print(os.path.isdir(fp), os.path.isdir(fp_fig))


True True

In [5]:
x = list(range(4))
y = [4, 7, 7, 8]

In [35]:
with sns.axes_style(style='whitegrid'):
    plt.plot(x, y, 'bo')
    plt.xlim([-1, 3.5])
    plt.ylim([-1, 9])

    plt.axhline(y=0, color='black')
    plt.axvline(x=0, color='black')
    
    plt.xlabel('x')
    plt.ylabel('y')
    
    plt.savefig(fp_fig + os.sep + 'linreg_eg1_plotpoints_noline.pdf')



In [37]:
hypot = lambda x:2*x + 2
x_val = np.arange(0, 3.5, 0.5)
y_val = [hypot(x) for x in x_val]

with sns.axes_style(style='whitegrid'):
    plt.plot(x, y, 'bo')
    plt.plot(x_val, y_val, color='red')
    plt.xlim([-1, 3.5])
    plt.ylim([-1, 9])
    
    plt.plot()

    plt.axhline(y=0, color='black')
    plt.axvline(x=0, color='black')
    
    plt.xlabel('x')
    plt.ylabel('y')
    
    plt.savefig(fp_fig + os.sep + 'linreg_eg1_plotpoints_line1.pdf')



In [38]:
from mpl_toolkits.mplot3d import axes3d, Axes3D
from matplotlib import cm

In [68]:
fig = plt.figure(figsize=(12, 12))
ax = fig.gca(projection='3d')

theta_0_vals = np.linspace(-5, 5, 20)
theta_1_vals = np.linspace(-1, 5, 20)

def compute_cost(param_0, param_1, x_list, y_list):
    m = len(x_list)
    x_bar = []
    
    def hypot_func(p0, p1, x):
        return p0 + p1 * x
    
    for x, y in zip(x_list, y_list):
        x_bar.append((hypot_func(param_0, param_1, x) - y) ** 2)
    
    cost = (1 / (2 * m)) * sum(x_bar)
    return cost
    

theta1, theta2, cost = [], [], []

hypot = lambda x:2*x + 2
x = list(range(4))
y = [4, 7, 7, 8]

for t0 in theta_0_vals:
    for t1 in theta_1_vals:
        theta1.append(t0)
        theta2.append(t1)
        cost.append(compute_cost(t0, t1, x_val, y_val))

scat = ax.scatter(theta1, theta2, cost)
plt.xlabel(r'$\theta_0$', fontsize=24)
plt.ylabel(r'$\theta_1$', fontsize=24)
plt.title(r'Cost Function by $\theta_0$ and $\theta_1$', fontsize=24)

plt.savefig(fp_fig + os.sep + 'linreg_eg2_cost_func_over_thetas.pdf')



In [69]:
from matplotlib import patches

In [116]:
with sns.axes_style('whitegrid'):
    fig, ax = plt.subplots()
    e1 = patches.Ellipse((3, 2000), 0.3, 400, edgecolor='black', linewidth=2, fill=False)
    e2 = patches.Ellipse((3, 2000), 0.6, 1000, edgecolor='black', linewidth=2, fill=False)
    e3 = patches.Ellipse((3, 2000), 1, 1800, edgecolor='black', linewidth=2, fill=False)
    e4 = patches.Ellipse((3, 2000), 1.5, 2800, edgecolor='black', linewidth=2, fill=False)
    e5 = patches.Ellipse((3, 2000), 2, 3900, edgecolor='black', linewidth=2, fill=False)
    e6 = patches.Ellipse((3, 2000), 2.5, 5500, edgecolor='black', linewidth=2, fill=False)
    
    ax.add_patch(e1)
    ax.add_patch(e2)
    ax.add_patch(e3)
    ax.add_patch(e4)
    ax.add_patch(e5)
    ax.add_patch(e6)
    plt.xlim(0, 10)
    plt.ylim(0, 5000)
    plt.xlabel(r'$\theta_1$', fontsize=18)
    plt.ylabel(r'$\theta_2$', fontsize=18)
    
    plt.savefig(fp_fig + os.sep + 'linreg_eg3_why_need_feature_scaling.pdf')



In [ ]: