In [ ]:
import numpy as np, pandas as pd, matplotlib.pyplot as plt, seaborn as sns
%matplotlib inline
sns.set_context('poster')
sns.set_style('darkgrid')
Anticipated questions: something about difference between statistics and machine learning, since the whole ISL chapter for this week is very familiar material on linear regression, while the DM chapter is a wild ride through a range of methods, from naive bayes to logistic regression to recursive partitioning/decision trees.
I have a set of solutions that I can circulate, but I think I will see if anyone in class is so happy with their solution that they want to share it. It should be easy to get it in front of everyone with the Sage cloud.
In [17]:
df = pd.read_csv('RWA_DHS6_2010_2011_HH_ASSETS.CSV', index_col=0)
df.head()
Out[17]:
In [18]:
cb = pd.read_csv('RWA_DHS6_2010_2011_HH_ASSETS_codebook.CSV', index_col=0)
cb
Out[18]:
In [ ]:
import sklearn.naive_bayes
clf = sklearn.naive_bayes.BernoulliNB()
In [ ]:
import sklearn.linear_model
clf = sklearn.linear_model.LinearRegression()
In [ ]:
import sklearn.tree
clf = sklearn.tree.DecisionTreeClassifier()
In [16]:
import ipynb_style
reload(ipynb_style)
ipynb_style.presentation()
Out[16]: