In [ ]:
import pandas as
In [ ]:
# a major part of the _art_ of applied machine learning
# is "feature engineering", which means mapping the
# raw data we loaded in the previous notebook into a
# representation that is *good*. I'm not going to
# get into that now. Instead, here is what it might look like:
df = pd.read_csv('../3-data/
In [ ]:
# remember that the fundamental package for
# scientific computing with Python?
import numpy as
In [ ]:
# we will use it to create an array of feature vectors
# and an array of the corresponding labels
X = np.array(df.filter(
y = np.array(df.
In [ ]:
# how much data are we dealing with here?
X.shape
In [ ]:
# Here is how to train a charmingly self-deprecating
# ML method, Naive Bayes, to predict underlying CoD
# with sklearn
import sklearn.n__v__b_y_s
In [ ]:
clf =
In [ ]:
# fit
clf.fit(
In [ ]:
# predict for one example
clf.predict(
In [ ]:
# compare for that one example
y[0]
In [ ]:
# So let's see how well it is making predictions overall:
y_pred = clf.predict(
In [ ]:
In [ ]:
import sklearn.n__ghb_rs
In [ ]:
clf =
In [ ]:
# fit
In [ ]:
# predict (for one)
In [ ]:
# compare (for one)
In [ ]:
# predict for all
In [ ]:
# compare for all
In [ ]:
import sklearn.ensemble
In [ ]:
clf = sklearn.ensemble.GradientBoostingClassifier()
In [ ]:
# fit
%time clf.fit(
In [ ]:
# predict for one
clf.predict(
In [ ]:
# compare for one
y[1]
In [ ]:
# predict for all
y_pred = clf.predict(X)
In [ ]:
# compare for all
np.mean(y == y_pred)