In [1]:
import numpy as np
import sys
if "../" not in sys.path:
  sys.path.append("../")

from sklearn.datasets import load_breast_cancer, load_iris
from sklearn.cross_validation import train_test_split
from sklearn.preprocessing import StandardScaler

from supervised.logistic_regression import LogisticRegression, Softmax

import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

import seaborn as sns
sns.set_context('notebook')
sns.set_style('white')

# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2


/Users/mac/anaconda/envs/py35/lib/python3.5/site-packages/sklearn/cross_validation.py:44: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.
  "This module will be removed in 0.20.", DeprecationWarning)

In [2]:
cancer = load_breast_cancer()
X_train, X_test, y_train, y_test = train_test_split(
    cancer.data, cancer.target, stratify=cancer.target, random_state=2046)


sc = StandardScaler()
sc.fit(X_train)
X_train = sc.transform(X_train)
X_test = sc.transform(X_test)

In [3]:
model_0 = LogisticRegression(lr=0.1, max_iters=2000)
model_0.fit(X_train, y_train)

print("Training set score: {:.3f}".format(np.mean(model_0.predict(X_train) == y_train)))
print("Test set score: {:.3f}".format(np.mean(model_0.predict(X_test) == y_test)))


Training set score: 0.988
Test set score: 0.965

In [4]:
model_1 = LogisticRegression(lr=0.1, max_iters=2000, C=0.1)
model_1.fit(X_train, y_train)

print("Training set score: {:.3f}".format(np.mean(model_1.predict(X_train) == y_train)))
print("Test set score: {:.3f}".format(np.mean(model_1.predict(X_test) == y_test)))


Training set score: 0.979
Test set score: 0.979

In [5]:
# Multi-class logistic regression
iris = load_iris()
X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=2046)

sc.fit(X_train)
X_train = sc.transform(X_train)
X_test = sc.transform(X_test)

In [6]:
softmax = Softmax(lr=0.05)
softmax.fit(X_train, y_train)

print("Training set score: {:.3f}".format(np.mean(softmax.predict(X_train) == y_train)))
print("Test set score: {:.3f}".format(np.mean(softmax.predict(X_test) == y_test)))


Training set score: 0.971
Test set score: 0.933