In [2]:
%matplotlib inline
import matplotlib.pyplot as plt
In [3]:
import numpy as np
from sklearn import linear_model
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
diabetes = datasets.load_diabetes()
X = StandardScaler().fit_transform(diabetes.data)
y = StandardScaler(with_mean=True, with_std=False).fit_transform(diabetes.target)
alphas, _, coefs = linear_model.lars_path(X, y, verbose=2)
xx = np.sum(np.abs(coefs.T), axis=1)
xx /= xx[-1]
plt.plot(xx, coefs.T)
ymin, ymax = plt.ylim()
plt.vlines(xx, ymin, ymax, linestyle='dashed')
plt.xlabel('|coef| / max|coef|')
plt.ylabel('Coefficients')
plt.axis('tight')
plt.show()
In [4]:
regr = linear_model.Lars()
regr.fit(X, y)
print("Coefficients are:", regr.coef_)
print("See also the ending points of the previous graph")
In [5]:
print("R2 score is", regr.score(X,y))
In [6]:
from sklearn.datasets import make_regression
X, y = make_regression(n_samples=10, n_features=1, n_informative=1, noise=3, random_state=1)
In [7]:
regr = linear_model.LinearRegression()
regr.fit(X, y)
test_x = 2*np.max(X)
pred_test_x = regr.predict(test_x)
pred_test_x
Out[7]:
In [8]:
plt.scatter(X, y)
x_bounds = np.array([1.2*np.min(X), 1.2*np.max(X)]).reshape(-1, 1)
plt.plot(x_bounds, regr.predict(x_bounds) , 'r-')
plt.plot(test_x, pred_test_x, 'g*')
plt.show()
In [9]:
regr = linear_model.BayesianRidge()
regr.fit(X, y)
Out[9]:
In [10]:
from matplotlib.mlab import normpdf
mean = regr.predict(test_x)
stddev = regr.alpha_
plt_x = np.linspace(mean-3*stddev, mean+3*stddev,100)
plt.plot(plt_x, normpdf(plt_x, mean, stddev))
plt.show()
In [11]:
regr.alpha_
Out[11]:
In [12]:
from sklearn.datasets import make_classification
from sklearn.linear_model import SGDClassifier
# we create 50 separable points
X, y = make_classification(n_samples=100, n_features=2,
n_informative=2, n_redundant=0,
n_clusters_per_class=1, class_sep=2,
random_state=101)
# fit the model
clf = SGDClassifier(loss="hinge", n_iter=500, random_state=101,
alpha=0.001)
clf.fit(X, y)
# plot the line, the points, and the nearest vectors to the plane
xx = np.linspace(np.min(X[:,0]), np.max(X[:,0]), 10)
yy = np.linspace(np.min(X[:,1]), np.max(X[:,1]), 10)
X1, X2 = np.meshgrid(xx, yy)
Z = np.empty(X1.shape)
for (i, j), val in np.ndenumerate(X1):
x1 = val
x2 = X2[i, j]
p = clf.decision_function([[x1, x2]])
Z[i, j] = p[0]
levels = [-1.0, 0.0, 1.0]
linestyles = ['dashed', 'solid', 'dashed']
plt.contour(X1, X2, Z, levels, colors='k', linestyles=linestyles)
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Paired)
plt.show()
In [13]:
from sklearn.cross_validation import train_test_split
from sklearn.metrics import accuracy_score
X, y = make_classification(n_samples=10000, n_features=20,
n_informative=5, n_redundant=5,
n_clusters_per_class=2, class_sep=1,
random_state=101)
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.3, random_state=101)
clf_1 = SGDClassifier(loss="hinge", random_state=101)
clf_1.fit(X_train, y_train)
clf_2 = SGDClassifier(loss="log", random_state=101)
clf_2.fit(X_train, y_train)
print('SVD : ', accuracy_score(y_test, clf_1.predict(X_test)))
print('Log. Regression: ', accuracy_score(y_test, clf_2.predict(X_test)))
In [14]:
%timeit clf_1.fit(X_train, y_train)
In [15]:
%timeit clf_2.fit(X_train, y_train)
In [16]:
from sklearn.svm import SVR
from sklearn.linear_model import SGDRegressor
from sklearn.metrics import mean_absolute_error
from sklearn.datasets import load_boston
boston = load_boston()
X = StandardScaler().fit_transform(boston['data'])
y = boston['target']
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.3, random_state=101)
regr_1 = SVR(kernel='linear')
regr_1.fit(X_train, y_train)
regr_2 = SGDRegressor(random_state=101)
regr_2.fit(X_train, y_train)
print('SVR : ', mean_absolute_error(y_test, regr_1.predict(X_test)))
print('Lin. Regression: ', mean_absolute_error(y_test, regr_2.predict(X_test)))
In [17]:
from sklearn.tree import DecisionTreeRegressor
regr = DecisionTreeRegressor(random_state=101)
regr.fit(X_train, y_train)
mean_absolute_error(y_test, regr.predict(X_test))
Out[17]:
In [18]:
from sklearn.ensemble import RandomForestRegressor
regr = RandomForestRegressor(n_estimators=100,
n_jobs=-1, random_state=101)
regr.fit(X_train, y_train)
mean_absolute_error(y_test, regr.predict(X_test))
Out[18]:
In [19]:
sorted(zip(regr.feature_importances_, boston['feature_names']),
key=lambda x: -x[0])
Out[19]:
In [20]:
from sklearn.ensemble import GradientBoostingRegressor
regr = GradientBoostingRegressor(n_estimators=500,
learning_rate=0.01,
random_state=101)
regr.fit(X_train, y_train)
mean_absolute_error(y_test, regr.predict(X_test))
Out[20]:
In [21]:
sorted(zip(regr.feature_importances_, boston['feature_names']),
key=lambda x: -x[0])
Out[21]:
In [22]:
# Your own weak classifier
In [23]:
from sklearn.ensemble import BaggingRegressor
bagging = BaggingRegressor(SGDRegressor(), n_jobs=-1,
n_estimators=1000, random_state=101,
max_features=0.8)
bagging.fit(X_train, y_train)
mean_absolute_error(y_test, bagging.predict(X_test))
Out[23]:
In [24]:
from sklearn.ensemble import AdaBoostRegressor
booster = AdaBoostRegressor(SGDRegressor(), random_state=101,
n_estimators=100, learning_rate=0.01)
booster.fit(X_train, y_train)
mean_absolute_error(y_test, booster.predict(X_test))
Out[24]:
In [27]:
from sklearn.ensemble import GradientBoostingRegressor
regr = GradientBoostingRegressor('lad',
n_estimators=500,
learning_rate=0.1,
random_state=101)
regr.fit(X_train, y_train)
mean_absolute_error(y_test, regr.predict(X_test))
Out[27]:
In [ ]: