Sklearn

sklearn.liner_model

linear_model:

  • RidgeClassifier
  • SGDClassifier
  • SGDRegressor
  • LinearRegression
  • LogisticRegression
  • Lasso
  • etc

In [ ]:
from matplotlib.colors import ListedColormap
from sklearn import cross_validation, datasets, linear_model, metrics

import numpy as np

In [ ]:
%pylab inline

Генерация данных


In [ ]:
blobs = datasets.make_blobs(centers = 2, cluster_std = 5.5, random_state=1)

In [ ]:
colors = ListedColormap(['red', 'blue'])

pylab.figure(figsize(8, 8))
pylab.scatter([x[0] for x in blobs[0]], [x[1] for x in blobs[0]], c=blobs[1], cmap=colors)

In [ ]:
train_data, test_data, train_labels, test_labels = cross_validation.train_test_split(blobs[0], blobs[1], 
                                                                                    test_size = 0.3,
                                                                                    random_state = 1)

Линейная классификация

RidgeClassifier


In [ ]:
#создание объекта - классификатора
ridge_classifier = linear_model.RidgeClassifier(random_state = 1)

In [ ]:
#обучение классификатора
ridge_classifier.fit(train_data, train_labels)

In [ ]:
#применение обученного классификатора
ridge_predictions = ridge_classifier.predict(test_data)

In [ ]:
print test_labels

In [ ]:
print ridge_predictions

In [ ]:
#оценка качества классификации
metrics.accuracy_score(test_labels, ridge_predictions)

In [ ]:
ridge_classifier.coef_

In [ ]:
ridge_classifier.intercept_

LogisticRegression


In [ ]:
log_regressor = linear_model.LogisticRegression(random_state = 1)

In [ ]:
log_regressor.fit(train_data, train_labels)

In [ ]:
lr_predictions = log_regressor.predict(test_data)

In [ ]:
lr_proba_predictions = log_regressor.predict_proba(test_data)

In [ ]:
print test_labels

In [ ]:
print lr_predictions

In [ ]:
print lr_proba_predictions

In [ ]:
print metrics.accuracy_score(test_labels, lr_predictions)

In [ ]:
print metrics.accuracy_score(test_labels, ridge_predictions)

Оценка качества по cross-validation

cross_val_score


In [ ]:
ridge_scoring = cross_validation.cross_val_score(ridge_classifier, blobs[0], blobs[1], scoring = 'accuracy', cv = 10)

In [ ]:
lr_scoring = cross_validation.cross_val_score(log_regressor, blobs[0], blobs[1], scoring = 'accuracy', cv = 10)

In [ ]:
lr_scoring

In [ ]:
print 'Ridge mean:{}, max:{}, min:{}, std:{}'.format(ridge_scoring.mean(), ridge_scoring.max(), 
                                                     ridge_scoring.min(), ridge_scoring.std())

In [ ]:
print 'Log mean:{}, max:{}, min:{}, std:{}'.format(lr_scoring.mean(), lr_scoring.max(), 
                                                   lr_scoring.min(), lr_scoring.std())

cross_val_score с заданными scorer и cv_strategy


In [ ]:
scorer = metrics.make_scorer(metrics.accuracy_score)

In [ ]:
cv_strategy = cross_validation.StratifiedShuffleSplit(blobs[1], n_iter = 20 , test_size = 0.3, random_state = 2)

In [ ]:
ridge_scoring = cross_validation.cross_val_score(ridge_classifier, blobs[0], blobs[1], scoring = scorer, cv = cv_strategy)

In [ ]:
lr_scoring = cross_validation.cross_val_score(log_regressor, blobs[0], blobs[1], scoring = scorer, cv = cv_strategy)

In [ ]:
print 'Ridge mean:{}, max:{}, min:{}, std:{}'.format(ridge_scoring.mean(), ridge_scoring.max(), 
                                                     ridge_scoring.min(), ridge_scoring.std())

In [ ]:
print 'Log mean:{}, max:{}, min:{}, std:{}'.format(lr_scoring.mean(), lr_scoring.max(), 
                                                   lr_scoring.min(), lr_scoring.std())