In [1]:
%matplotlib inline
In [2]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import time
Setup seaborn to use slightly larger fonts
In [3]:
sns.set_context("talk")
(from https://twitter.com/jeremyjarvis/status/428848527226437632/photo/1)
$Y = h(X)$
$e_{train} = h(X_{train}) - Y_{train}$
$ \hat{Y} = h(X_{new})$
In [4]:
import numpy as np
from sklearn.linear_model import LinearRegression
# Training output samples
height_inches = np.array([[51.0, 56.0, 64.0, 71.0, 69.0]]).T
# Training feature samples
age_years = np.array([[7.8, 10.7, 13.7, 17.5, 20.1]]).T
# Initialize model
model = LinearRegression()
# Train
model.fit(age_years, height_inches)
# Predict
model.predict(15.0)
Out[4]:
In [5]:
def plot_boys(age, height, test=None, pred=None):
plt.plot(age_years, height_inches, marker='o', ls='none')
plt.xlabel("Age (years)")
plt.ylabel("Height (inches)")
plt.title("Boys")
if test is not None:
plt.plot(test, pred, '-');
In [6]:
plot_boys(age_years, height_inches);
In [7]:
test = np.array([np.linspace(7.0, 21.0)]).T
pred = model.predict(test)
In [8]:
plot_boys(age_years, height_inches, test, pred)
In [9]:
test = np.array([np.linspace(0.0, 50.0)]).T
pred = model.predict(test)
In [10]:
plot_boys(age_years, height_inches, test, pred)
In [11]:
from sklearn.preprocessing import PolynomialFeatures
pf = PolynomialFeatures(4)
x_poly = pf.fit_transform(age_years)
model.fit(x_poly, height_inches)
test = np.array([np.linspace(7.0, 21.0)]).T
test_poly = pf.transform(test)
pred = model.predict(test_poly)
In [12]:
plot_boys(age_years, height_inches, test, pred)
In [13]:
test = np.array([np.linspace(0.0, 50.0)]).T
test_poly = pf.transform(test)
pred = model.predict(test_poly)
In [14]:
plot_boys(age_years, height_inches, test, pred)
insert
with appropriately formatted JSON to configure and train the model.predict
with similarly formatted JSON to make predictions.Luckily, there are good examples here: https://github.com/google/google-api-python-client
In [15]:
import csv
dtypes = {
"PassengerId":np.int64,
"Survived":object,
"Pclass":np.int64,
"Name":object,
"Sex":object,
"Age":np.float64,
}
In [16]:
train_df = pd.read_csv("train.csv", dtype=dtypes)
train_df
Out[16]:
In [17]:
import googleprediction
model = googleprediction.GooglePredictor(
"myproject",
"mybucket/train_cleaned.csv",
"hastalapasta",
"client_secrets.json")
In [18]:
def survived(pred):
pred = pred[0]
if pred == u'1':
print "YES"
else:
print "NO"
In [19]:
pred = model.predict([[
'1', # Fare class
'Spencer Mrs William Augustus Marie Eugenie', # Name
'female', # Gender
20.2, # Age
1, # Number of parents or children aboard
0, # Number of siblings or spouse aboard
146.5208, # Fare price
],])
survived(pred)
In [20]:
pred = model.predict([[
'1', # Fare class
'Frank Lampard', # Name
'male', # Gender
36.0, # Age
0, # Number of parents or children aboard
0, # Number of siblings or spouse aboard
20.0, # Fare price
],])
survived(pred)
In [21]:
pred = model.predict([[
'1', # Fare class
'Frank Lampard', # Name
'male', # Gender
36.0, # Age
0, # Number of parents or children aboard
0, # Number of siblings or spouse aboard
500.0, # Fare price
],])
survived(pred)
Prediction accuracies for sklearn.RandomForestClassifier
(100 trees) and Goole Prediction API
on the Taylor Swift audio clip data set.
In [21]: