Goal: To cover basic details of data preparation for data modelling, data modelling and validation.
Data Preparation:
Data Modelling:
Linear Regression Example
Support Vector Machines *
Validation:
http://scikit-learn.org/stable/auto_examples/ensemble/plot_gradient_boosting_regression.html
categorical variables to indexes
In [38]:
import numpy as np
In [39]:
weights = np.array(list(map(lambda x: int(x), np.random.rand(10) * 100)))
In [26]:
heights = np.random.choice(np.arange(15), 5)
In [27]:
weights
Out[27]:
In [28]:
heights
Out[28]:
In [ ]:
v = 2 * g * h
In [67]:
from sklearn.datasets import load_boston
In [68]:
data = load_boston()
In [50]:
data.feature_names
Out[50]:
In [79]:
from sklearn import datasets
from sklearn.model_selection import cross_val_predict
from sklearn import linear_model
import matplotlib.pyplot as plt
lr = linear_model.LinearRegression()
boston = datasets.load_boston()
y = boston.target
# cross_val_predict returns an array of the same size as `y` where each entry
# is a prediction obtained by cross validation:
predicted = cross_val_predict(lr, boston.data, y, cv=10)
fig, ax = plt.subplots(figsize=(15, 5))
ax.scatter(y, predicted)
ax.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=4)
ax.set_xlabel('Measured')
ax.set_ylabel('Predicted')
plt.show()
In [ ]:
In [ ]:
In [ ]: