In [4]:
import os
import sys
import datetime as dt
import numpy as np
import pandas as pd
from scipy import stats, constants
from scipy.special import comb, perm, factorial
import statsmodels.api as sm
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
In [29]:
fp_list = ['C:', 'Users', 'szahn', 'Dropbox', 'Statistics & Machine Learning', 'coursera_ml_notes']
fp = os.sep.join(fp_list)
fp_fig = fp + os.sep + 'Figures'
print(os.path.isdir(fp), os.path.isdir(fp_fig))
In [5]:
x = list(range(4))
y = [4, 7, 7, 8]
In [35]:
with sns.axes_style(style='whitegrid'):
plt.plot(x, y, 'bo')
plt.xlim([-1, 3.5])
plt.ylim([-1, 9])
plt.axhline(y=0, color='black')
plt.axvline(x=0, color='black')
plt.xlabel('x')
plt.ylabel('y')
plt.savefig(fp_fig + os.sep + 'linreg_eg1_plotpoints_noline.pdf')
In [37]:
hypot = lambda x:2*x + 2
x_val = np.arange(0, 3.5, 0.5)
y_val = [hypot(x) for x in x_val]
with sns.axes_style(style='whitegrid'):
plt.plot(x, y, 'bo')
plt.plot(x_val, y_val, color='red')
plt.xlim([-1, 3.5])
plt.ylim([-1, 9])
plt.plot()
plt.axhline(y=0, color='black')
plt.axvline(x=0, color='black')
plt.xlabel('x')
plt.ylabel('y')
plt.savefig(fp_fig + os.sep + 'linreg_eg1_plotpoints_line1.pdf')
In [38]:
from mpl_toolkits.mplot3d import axes3d, Axes3D
from matplotlib import cm
In [68]:
fig = plt.figure(figsize=(12, 12))
ax = fig.gca(projection='3d')
theta_0_vals = np.linspace(-5, 5, 20)
theta_1_vals = np.linspace(-1, 5, 20)
def compute_cost(param_0, param_1, x_list, y_list):
m = len(x_list)
x_bar = []
def hypot_func(p0, p1, x):
return p0 + p1 * x
for x, y in zip(x_list, y_list):
x_bar.append((hypot_func(param_0, param_1, x) - y) ** 2)
cost = (1 / (2 * m)) * sum(x_bar)
return cost
theta1, theta2, cost = [], [], []
hypot = lambda x:2*x + 2
x = list(range(4))
y = [4, 7, 7, 8]
for t0 in theta_0_vals:
for t1 in theta_1_vals:
theta1.append(t0)
theta2.append(t1)
cost.append(compute_cost(t0, t1, x_val, y_val))
scat = ax.scatter(theta1, theta2, cost)
plt.xlabel(r'$\theta_0$', fontsize=24)
plt.ylabel(r'$\theta_1$', fontsize=24)
plt.title(r'Cost Function by $\theta_0$ and $\theta_1$', fontsize=24)
plt.savefig(fp_fig + os.sep + 'linreg_eg2_cost_func_over_thetas.pdf')
In [69]:
from matplotlib import patches
In [116]:
with sns.axes_style('whitegrid'):
fig, ax = plt.subplots()
e1 = patches.Ellipse((3, 2000), 0.3, 400, edgecolor='black', linewidth=2, fill=False)
e2 = patches.Ellipse((3, 2000), 0.6, 1000, edgecolor='black', linewidth=2, fill=False)
e3 = patches.Ellipse((3, 2000), 1, 1800, edgecolor='black', linewidth=2, fill=False)
e4 = patches.Ellipse((3, 2000), 1.5, 2800, edgecolor='black', linewidth=2, fill=False)
e5 = patches.Ellipse((3, 2000), 2, 3900, edgecolor='black', linewidth=2, fill=False)
e6 = patches.Ellipse((3, 2000), 2.5, 5500, edgecolor='black', linewidth=2, fill=False)
ax.add_patch(e1)
ax.add_patch(e2)
ax.add_patch(e3)
ax.add_patch(e4)
ax.add_patch(e5)
ax.add_patch(e6)
plt.xlim(0, 10)
plt.ylim(0, 5000)
plt.xlabel(r'$\theta_1$', fontsize=18)
plt.ylabel(r'$\theta_2$', fontsize=18)
plt.savefig(fp_fig + os.sep + 'linreg_eg3_why_need_feature_scaling.pdf')
In [ ]: