In [2]:
import os
import sys
import datetime as dt

import numpy as np
import pandas as pd
from scipy import stats, constants
from scipy.special import comb, perm, factorial, expit
import statsmodels.api as sm

import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

In [3]:
fp_list_master = ['C:', 'Users', 'szahn', 'Dropbox', 'Statistics & Machine Learning', 'coursera_ml_notes']
fp = os.sep.join(fp_list_master)
fp_fig = fp + os.sep + 'LaTeX Notes' + os.sep + 'Figures'
print(os.path.isdir(fp), os.path.isdir(fp_fig))


True True

In [48]:
x_list = [1, 2, 3, 4, 7, 8, 9, 10]
y_list = [0, 0, 0, 0, 1, 1, 1, 1]

with sns.axes_style('white'):
    
    fig, ax = plt.subplots(figsize=(10, 6))
    plt.plot(x_list, y_list, 'rD', markersize=16)
    plt.xlabel("Tumor Size", fontsize=24)
    plt.ylabel('Malignant?', fontsize=24)
    plt.yticks([0, 1], fontsize=24)
    plt.xticks([])
    plt.ylim(-0.10, 1.10)
    plt.xlim(-0.1, 11)
    
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    
    plt.savefig(fp_fig + os.sep + 'logreg_eg1_maltumor.pdf')



In [51]:
x_list = [1, 2, 3, 4, 7, 8, 9, 10]
y_list = [0, 0, 0, 0, 1, 1, 1, 1]

x_list_reg = list(range(11))
regline = lambda x: (1/6) * (x - 1) - 0.25
y_list_reg = [regline(x) for x in x_list_reg]

with sns.axes_style('white'):
    
    fig, ax = plt.subplots(figsize=(10, 6))
    plt.plot(x_list, y_list, 'rD', markersize=16)
    plt.plot(x_list_reg, y_list_reg, 'b-', linewidth=4)
    plt.xlabel("Tumor Size", fontsize=24)
    plt.ylabel('Malignant?', fontsize=24)
    plt.yticks([0, 1], fontsize=24)
    plt.xticks([])
    plt.ylim(-0.10, 1.10)
    plt.xlim(-0.1, 11)
    
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    
    plt.savefig(fp_fig + os.sep + 'logreg_eg1_maltumor_linreg1.pdf')



In [87]:
x_list = [1, 2, 3, 4, 7, 8, 9, 10]
y_list = [0, 0, 0, 0, 1, 1, 1, 1]

x_list_reg = list(range(11))
regline = lambda x: (1/6) * (x - 1) - 0.25
y_list_reg = [regline(x) for x in x_list_reg]

textstr1 = 'Not malignant'
textstr2 = 'Malignant'
props = dict(boxstyle='round', facecolor='dodgerblue', alpha=0.5)

with sns.axes_style('white'):
    
    fig, ax = plt.subplots(figsize=(10, 6))
    plt.plot(x_list, y_list, 'rD', markersize=16)
    plt.plot(x_list_reg, y_list_reg, 'b-', linewidth=4)
    plt.xlabel("Tumor Size", fontsize=24)
    plt.ylabel('Malignant?', fontsize=24)
    plt.yticks([0, 1], fontsize=24)
    plt.xticks([])
    plt.ylim(-0.10, 1.10)
    plt.xlim(0, 10.5)
    
    plt.axvline(x=5, color='purple', linewidth=6)
    plt.axvspan(0, 5, color='wheat')
    plt.axvspan(5, 11, color='lavenderblush')
    
    ax.text(1, 0.95, textstr1, fontsize=20, verticalalignment='top', bbox=props)
    ax.text(8, 0.15, textstr2, fontsize=20, verticalalignment='top', bbox=props)
    
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    
    plt.savefig(fp_fig + os.sep + 'logreg_eg1_maltumor_linreg1_threshold.pdf')



In [104]:
x_list = [1, 2, 3, 4, 7, 8, 9, 10, 15]
y_list = [0, 0, 0, 0, 1, 1, 1, 1, 1]

x_list_reg = list(range(16))
regline = lambda x: (1/10) * x - 0.25
y_list_reg = [regline(x) for x in x_list_reg]


textstr1 = 'Not malignant'
textstr2 = 'Malignant'
props = dict(boxstyle='round', facecolor='dodgerblue', alpha=0.5)

with sns.axes_style('white'):
    
    fig, ax = plt.subplots(figsize=(10, 6))
    plt.plot(x_list, y_list, 'rD', markersize=16)
    plt.plot(x_list_reg, y_list_reg, 'b-', linewidth=4)
    plt.xlabel("Tumor Size", fontsize=24)
    plt.ylabel('Malignant?', fontsize=24)
    plt.yticks([0, 1], fontsize=24)
    plt.xticks([])
    plt.ylim(-0.10, 1.10)
    plt.xlim(0, 16)
    
    plt.axvline(x=7.5, color='purple', linewidth=6)
    plt.axvspan(0, 7.5, color='wheat')
    plt.axvspan(7.5, 16, color='lavenderblush')
    
    ax.text(1, 0.95, textstr1, fontsize=20, verticalalignment='top', bbox=props)
    ax.text(13, 0.15, textstr2, fontsize=20, verticalalignment='top', bbox=props)
    
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    
    plt.savefig(fp_fig + os.sep + 'logreg_eg1_maltumor_linreg1_newpoint.pdf')



In [140]:
x_list = np.linspace(-15, 15, 150)
y_list = expit(x_list)


with sns.axes_style('white'):
    
    fig, ax = plt.subplots(figsize=(10, 6))
    plt.plot(x_list, y_list, 'b')
    plt.xlim(-15, 15)
    plt.ylim(-0.05, 1.05)
    
    ax.spines['left'].set_position('zero')
    ax.spines['right'].set_color('none')
    ax.spines['bottom'].set_position('zero')
    ax.spines['top'].set_color('none')
    ax.spines['left'].set_smart_bounds(True)
    ax.spines['bottom'].set_smart_bounds(True)
    ax.xaxis.set_ticks_position('bottom')
    ax.yaxis.set_ticks_position('left')
    plt.yticks([0, 0.5, 1], fontsize=18)
    plt.xticks(fontsize=18)
    
    plt.savefig(fp_fig + os.sep + 'logreg_eg2_sigmoid_func_plot.pdf')



In [34]:
x_list1 = [0.5, 0.5, 0.5, 0.5, 1, 1, 1, 1.5, 1.5, 2]
y_list1 = [0.5, 1, 1.5, 2, 0.5, 1, 1.5, 0.5, 1, 0.5]

x_list2 = [2.5, 2.5, 2.5, 2.5, 2, 1.5, 1, 2, 1.5, 2]
y_list2 = [1, 1.5, 2, 2.5, 2.5, 2.5, 2.5, 2, 2, 1.5]

with sns.axes_style('white'):
    fig, ax = plt.subplots(figsize=(6, 6))
    plt.plot(x_list1, y_list1, 'bo', markersize=20)
    plt.plot(x_list2, y_list2, 'g^', markersize=20)
    
    
    #plt.plot(x_list_reg, y_list_reg, 'b-', linewidth=4)
    #plt.xlabel("Tumor Size", fontsize=24)
    #plt.ylabel('Malignant?', fontsize=24)
    plt.yticks([0, 1, 2, 3], fontsize=18)
    plt.xticks([0, 1, 2, 3], fontsize=18)
    plt.ylim(0, 3)
    plt.xlim(0, 3)
    
    
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    
    plt.savefig(fp_fig + os.sep + 'logreg_eg3_decision_bndy_noline.pdf')



In [51]:
x_list1 = [0.5, 0.5, 0.5, 0.5, 1, 1, 1, 1.5, 1.5, 2]
y_list1 = [0.5, 1, 1.5, 2, 0.5, 1, 1.5, 0.5, 1, 0.5]

x_list2 = [2.5, 2.5, 2.5, 2.5, 2, 1.5, 1, 2, 1.5, 2]
y_list2 = [1, 1.5, 2, 2.5, 2.5, 2.5, 2.5, 2, 2, 1.5]

x_list3 = np.linspace(0, 3, 50)
y_list3 = x_list3[::-1]

textstr1 = 'y = 0'
textstr2 = 'y = 1'
props = dict(boxstyle='round', facecolor='purple', alpha=0.5)

with sns.axes_style('white'):
    fig, ax = plt.subplots(figsize=(6, 6))
    plt.plot(x_list1, y_list1, 'bo', markersize=20)
    plt.plot(x_list2, y_list2, 'g^', markersize=20)
    plt.plot(x_list3, y_list3, '-', color='purple', linewidth=4)
    
    #plt.plot(x_list_reg, y_list_reg, 'b-', linewidth=4)
    #plt.xlabel("Tumor Size", fontsize=24)
    #plt.ylabel('Malignant?', fontsize=24)
    plt.yticks([0, 1, 2, 3], fontsize=18)
    plt.xticks([0, 1, 2, 3], fontsize=18)
    plt.ylim(0, 3.5)
    plt.xlim(0, 3.5)
    
    ax.text(0.1, 0.3, textstr1, fontsize=20, verticalalignment='top', bbox=props)
    ax.text(2.3, 3, textstr2, fontsize=20, verticalalignment='top', bbox=props)
    
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    
    plt.savefig(fp_fig + os.sep + 'logreg_eg3_decision_bndy_withline.pdf')



In [81]:
x_list1 = np.random.uniform(low=-0.8, high=0.8, size=100)
y_list1 = np.random.uniform(low=-0.8, high=0.8, size=100)

x_partial_1 = np.random.uniform(-1, 1, 25)
y_partial_1 = np.random.uniform(2, 3, 12)
y_partial_2 = np.random.uniform(-3, -2, 13)
x_partial_2 = np.random.uniform(2, 3, 13)
x_partial_3 = np.random.uniform(-3, -2, 12)
y_partial_3 = np.random.uniform(-1, 1, 25)
x_partial_4 = np.random.uniform(1, 2, 12)
y_partial_4 = np.random.uniform(1, 2, 12)
x_partial_5 = np.random.uniform(-2, -1, 12)
y_partial_5 = np.random.uniform(1, 2, 12)
x_partial_6 = np.random.uniform(1, 2, 12)
y_partial_6 = np.random.uniform(-2, -1, 12)
x_partial_7 = np.random.uniform(-2, -1, 12)
y_partial_7 = np.random.uniform(-2, -1, 12)

x_list2 = np.concatenate([x_partial_1, x_partial_2, x_partial_3, x_partial_4, x_partial_5, x_partial_6, x_partial_7], 0)
y_list2 = np.concatenate([y_partial_1, y_partial_2, y_partial_3, y_partial_4, y_partial_5, y_partial_6, y_partial_7], 0)

textstr1 = 'y = 0'
textstr2 = 'y = 1'
props = dict(boxstyle='round', facecolor='purple', alpha=0.5)
circle = plt.Circle((0, 0), 1.3, color='purple', fill=False, linewidth=4)

with sns.axes_style('white'):
    fig, ax = plt.subplots(figsize=(6, 6))
    plt.plot(x_list1, y_list1, 'b.')
    plt.plot(x_list2, y_list2, 'rd')
    #plt.plot(x_list3, y_list3, '-', color='purple', linewidth=4)
    
    #plt.plot(x_list_reg, y_list_reg, 'b-', linewidth=4)
    #plt.xlabel("Tumor Size", fontsize=24)
    #plt.ylabel('Malignant?', fontsize=24)
    plt.yticks(fontsize=18)
    plt.xticks(fontsize=18)
    plt.ylim(-3.2, 3.2)
    plt.xlim(-3.2, 3.2)
    
    #ax.add_artist(circle)
    
    #ax.text(0.1, 0.3, textstr1, fontsize=20, verticalalignment='top', bbox=props)
    #ax.text(2.3, 3, textstr2, fontsize=20, verticalalignment='top', bbox=props)
    
    ax.spines['left'].set_position('zero')
    ax.spines['right'].set_color('none')
    ax.spines['bottom'].set_position('zero')
    ax.spines['top'].set_color('none')
    #ax.spines['left'].set_smart_bounds(True)
    #ax.spines['bottom'].set_smart_bounds(True)
    ax.xaxis.set_ticks_position('bottom')
    ax.yaxis.set_ticks_position('left')
    
    plt.savefig(fp_fig + os.sep + 'logreg_eg3_decision_bndy_nonlinear_nocirc.pdf')



In [82]:
x_list1 = np.random.uniform(low=-0.8, high=0.8, size=100)
y_list1 = np.random.uniform(low=-0.8, high=0.8, size=100)

x_partial_1 = np.random.uniform(-1, 1, 25)
y_partial_1 = np.random.uniform(2, 3, 12)
y_partial_2 = np.random.uniform(-3, -2, 13)
x_partial_2 = np.random.uniform(2, 3, 13)
x_partial_3 = np.random.uniform(-3, -2, 12)
y_partial_3 = np.random.uniform(-1, 1, 25)
x_partial_4 = np.random.uniform(1, 2, 12)
y_partial_4 = np.random.uniform(1, 2, 12)
x_partial_5 = np.random.uniform(-2, -1, 12)
y_partial_5 = np.random.uniform(1, 2, 12)
x_partial_6 = np.random.uniform(1, 2, 12)
y_partial_6 = np.random.uniform(-2, -1, 12)
x_partial_7 = np.random.uniform(-2, -1, 12)
y_partial_7 = np.random.uniform(-2, -1, 12)

x_list2 = np.concatenate([x_partial_1, x_partial_2, x_partial_3, x_partial_4, x_partial_5, x_partial_6, x_partial_7], 0)
y_list2 = np.concatenate([y_partial_1, y_partial_2, y_partial_3, y_partial_4, y_partial_5, y_partial_6, y_partial_7], 0)

textstr1 = 'y = 0'
textstr2 = 'y = 1'
props = dict(boxstyle='round', facecolor='purple', alpha=0.5)
circle = plt.Circle((0, 0), 1.3, color='purple', fill=False, linewidth=4)

with sns.axes_style('white'):
    fig, ax = plt.subplots(figsize=(6, 6))
    plt.plot(x_list1, y_list1, 'b.')
    plt.plot(x_list2, y_list2, 'rd')
    #plt.plot(x_list3, y_list3, '-', color='purple', linewidth=4)
    
    #plt.plot(x_list_reg, y_list_reg, 'b-', linewidth=4)
    #plt.xlabel("Tumor Size", fontsize=24)
    #plt.ylabel('Malignant?', fontsize=24)
    plt.yticks(fontsize=18)
    plt.xticks(fontsize=18)
    plt.ylim(-3.2, 3.2)
    plt.xlim(-3.2, 3.2)
    
    ax.add_artist(circle)
    
    #ax.text(0.1, 0.3, textstr1, fontsize=20, verticalalignment='top', bbox=props)
    #ax.text(2.3, 3, textstr2, fontsize=20, verticalalignment='top', bbox=props)
    
    ax.spines['left'].set_position('zero')
    ax.spines['right'].set_color('none')
    ax.spines['bottom'].set_position('zero')
    ax.spines['top'].set_color('none')
    #ax.spines['left'].set_smart_bounds(True)
    #ax.spines['bottom'].set_smart_bounds(True)
    ax.xaxis.set_ticks_position('bottom')
    ax.yaxis.set_ticks_position('left')
    
    plt.savefig(fp_fig + os.sep + 'logreg_eg3_decision_bndy_nonlinear.pdf')



In [104]:
nonconvex = lambda x: x**2 + 10*np.sin(2*x)
x = np.arange(-10, 10, 0.1)

plt.figure(figsize=(6, 4))
plt.plot(x, nonconvex(x))
plt.xlabel(r'$\theta$', fontsize=20)
plt.yticks([0, 50, 100], fontsize=14)
plt.xticks([-10, 0, 10], fontsize=14)
plt.title('Non-Convex Curve', fontsize=20)

plt.savefig(fp_fig + os.sep + 'logreg_eg4_sample_nonconvex_curve.pdf')



In [151]:
def cost_function(hypothesis_function, y):
    if y == 1:
        return -1 * np.log(hypothesis_function)
    elif y == 0:
        return -1 * np.log(1 - hypothesis_function)

In [152]:
x = np.arange(0, 1, 0.05)

In [ ]:


In [163]:
with sns.axes_style('white'):
    fig, ax = plt.subplots(figsize=(6, 4))
    plt.plot(x, cost_function(x, 1))

    ax.spines['left'].set_position('zero')
    ax.spines['right'].set_color('none')
    ax.spines['bottom'].set_position('zero')
    ax.spines['top'].set_color('none')
    ax.xaxis.set_ticks_position('bottom')
    ax.yaxis.set_ticks_position('left')

    plt.yticks(fontsize=14)
    plt.xticks(fontsize=14)
    plt.ylim(ymin=-0.5)
    plt.xlabel(r'$h_\theta (x)$', fontsize=20)

    plt.savefig(fp_fig + os.sep + 'logreg_eg5_cost_func_y1.pdf')



In [165]:
with sns.axes_style('white'):
    fig, ax = plt.subplots(figsize=(6, 4))
    plt.plot(x, cost_function(x, 0))

    ax.spines['left'].set_position('zero')
    ax.spines['right'].set_color('none')
    ax.spines['bottom'].set_position('zero')
    ax.spines['top'].set_color('none')
    ax.xaxis.set_ticks_position('bottom')
    ax.yaxis.set_ticks_position('left')


    plt.yticks(fontsize=14)
    plt.xticks(fontsize=14)
    plt.ylim(ymin=-0.5)
    plt.xlabel(r'$h_\theta (x)$', fontsize=20)

    plt.savefig(fp_fig + os.sep + 'logreg_eg5_cost_func_y0.pdf')



In [27]:
x1 = [0.6, 1, 0.6, 0.9]
y1 = [0.7, 0.6, 0.9, 1.1]

x2 = [2, 1.9, 2.4, 2.2]
y2 = [2.2, 1.7, 2.1, 1.8]

x3 = [0.75, 0.5, 0.9]
y3 = [2.1, 2.5, 2.3]

with sns.axes_style('white'):
    fig, ax = plt.subplots(figsize=(6, 6))
    plt.plot(x1, y1, 'bo', markersize=20)
    plt.plot(x2, y2, 'g^', markersize=20)
    plt.plot(x3, y3, 'rs', markersize=20)
    
    
    #plt.plot(x_list_reg, y_list_reg, 'b-', linewidth=4)
    plt.xlabel("$x_1$", fontsize=28)
    plt.ylabel('$x_2$', fontsize=28)
    plt.yticks([0, 1, 2, 3], fontsize=18)
    plt.xticks([0, 1, 2, 3], fontsize=18)
    plt.ylim(0, 3)
    plt.xlim(0, 3)
    
    
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    
    plt.savefig(fp_fig + os.sep + 'logreg_eg6_multiclass_eg_data.pdf')



In [31]:
x1 = [0.7, 0.9, 0.8, 1.1]
y1 = [0.4, 0.5, 0.7, 0.9]

x2 = [2.2, 2.1, 2.4, 2.6]
y2 = [2.2, 1.7, 2.1, 1.8]

with sns.axes_style('white'):
    fig, ax = plt.subplots(figsize=(6, 6))
    plt.plot(x1, y1, 'bo', markersize=20)
    plt.plot(x2, y2, 'g^', markersize=20)
    
    
    #plt.plot(x_list_reg, y_list_reg, 'b-', linewidth=4)
    plt.xlabel("$x_1$", fontsize=28)
    plt.ylabel('$x_2$', fontsize=28)
    plt.yticks([0, 1, 2, 3], fontsize=18)
    plt.xticks([0, 1, 2, 3], fontsize=18)
    plt.ylim(0, 3)
    plt.xlim(0, 3)
    
    
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    
    plt.savefig(fp_fig + os.sep + 'logreg_eg6_binary_eg_data.pdf')



In [34]:
x1 = [0.6, 1, 0.6, 0.9]
y1 = [0.7, 0.6, 0.9, 1.1]

x2 = [2, 1.9, 2.4, 2.2]
y2 = [2.2, 1.7, 2.1, 1.8]

x3 = [0.75, 0.5, 0.9]
y3 = [2.1, 2.5, 2.3]

with sns.axes_style('white'):
    fig, ax = plt.subplots(figsize=(6, 6))
    plt.plot(x1, y1, 'bo', markersize=20)
    plt.plot(x2, y2, 'kd', markersize=20)
    plt.plot(x3, y3, 'kd', markersize=20)
    
    
    #plt.plot(x_list_reg, y_list_reg, 'b-', linewidth=4)
    plt.xlabel("$x_1$", fontsize=28)
    plt.ylabel('$x_2$', fontsize=28)
    plt.yticks([0, 1, 2, 3], fontsize=18)
    plt.xticks([0, 1, 2, 3], fontsize=18)
    plt.ylim(0, 3)
    plt.xlim(0, 3)
    
    
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    
    plt.savefig(fp_fig + os.sep + 'logreg_eg6_multiclass_onevall_step1.pdf')



In [52]:
x1 = [1, 1.5, 2, 3.2, 4.5, 6]
y1 = [1, 3.5, 5, 5.5, 6, 6.2]

x2 = np.linspace(0, 7, 50)
fx = lambda x:0.9 * x + 1.5
y2 = [fx(x) for x in x2]

with sns.axes_style('white'):
    fig, ax = plt.subplots(figsize=(6, 6))
    plt.plot(x1, y1, 'rd', markersize=16)
    plt.plot(x2, y2, 'b-')
    
    plt.xlabel(r"$Size\\\theta_0 + \theta_1 x$", fontsize=28)
    plt.ylabel('Price', fontsize=28)
    plt.ylim(0, 7)
    plt.xlim(0, 7)
    
    
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    
    #plt.savefig(fp_fig + os.sep + 'logreg_eg6_multiclass_onevall_step1.pdf')