In [3]:
%pylab inline


Populating the interactive namespace from numpy and matplotlib

In [30]:
# Import libraries
from __future__ import absolute_import, division, print_function

# Ignore warnings
import warnings
warnings.filterwarnings('ignore')

import sys
sys.path.append('tools/')

import numpy as np
import pandas as pd
from scipy.stats import norm

# Graphing Libraries
import matplotlib.pyplot as pyplt
import seaborn as sns
sns.set_style("white")  


from IPython.display import display
pyplt.rcParams['figure.figsize'] = (4, 3)

In [31]:
# Simple line graph of the density function of a standard normal.

X = np.linspace(-6, 6, 500)
mean = 0
variance = 1
density = norm.pdf(X, mean, variance)

plot(X, density)
pyplt.title("Gaussian pdf")

mean = 0
variance = 0.5
density = norm.pdf(X, mean, variance)
plot(X, density);


Sigmoid Function


In [40]:
e = np.exp(1)
x = np.linspace(-10, 10, 1000)
y = e**x / (e**x + 1)
pyplt.plot(x, y)
pyplt.title('sigmoid function');


Fit logistic model to SAT scores.


In [89]:
from scipy.special import logit
from sklearn.linear_model import LogisticRegression

data = pd.read_csv('data/sat2014.csv')
data.head()


Out[89]:
State Participation Rate Critical Reading Math Writing Combined
0 North Dakota 2.3 612 620 584 1816
1 Illinois 4.6 599 616 587 1802
2 Iowa 3.1 605 611 578 1794
3 South Dakota 2.9 604 609 579 1792
4 Minnesota 5.9 598 610 578 1786

In [127]:
X, y = data.ix[:, 2:5], data.ix[:, 5]


logistic = LogisticRegression()
model = logistic.fit(X, y)

# Solve for the decision boundary
a = model.coef_; b = model.intercept_;

In [ ]:


In [ ]: