linear_model:
In [ ]:
from matplotlib.colors import ListedColormap
from sklearn import cross_validation, datasets, linear_model, metrics
import numpy as np
In [ ]:
%pylab inline
In [ ]:
blobs = datasets.make_blobs(centers = 2, cluster_std = 5.5, random_state=1)
In [ ]:
colors = ListedColormap(['red', 'blue'])
pylab.figure(figsize(8, 8))
pylab.scatter([x[0] for x in blobs[0]], [x[1] for x in blobs[0]], c=blobs[1], cmap=colors)
In [ ]:
train_data, test_data, train_labels, test_labels = cross_validation.train_test_split(blobs[0], blobs[1],
test_size = 0.3,
random_state = 1)
In [ ]:
#создание объекта - классификатора
ridge_classifier = linear_model.RidgeClassifier(random_state = 1)
In [ ]:
#обучение классификатора
ridge_classifier.fit(train_data, train_labels)
In [ ]:
#применение обученного классификатора
ridge_predictions = ridge_classifier.predict(test_data)
In [ ]:
print test_labels
In [ ]:
print ridge_predictions
In [ ]:
#оценка качества классификации
metrics.accuracy_score(test_labels, ridge_predictions)
In [ ]:
ridge_classifier.coef_
In [ ]:
ridge_classifier.intercept_
In [ ]:
log_regressor = linear_model.LogisticRegression(random_state = 1)
In [ ]:
log_regressor.fit(train_data, train_labels)
In [ ]:
lr_predictions = log_regressor.predict(test_data)
In [ ]:
lr_proba_predictions = log_regressor.predict_proba(test_data)
In [ ]:
print test_labels
In [ ]:
print lr_predictions
In [ ]:
print lr_proba_predictions
In [ ]:
print metrics.accuracy_score(test_labels, lr_predictions)
In [ ]:
print metrics.accuracy_score(test_labels, ridge_predictions)
In [ ]:
ridge_scoring = cross_validation.cross_val_score(ridge_classifier, blobs[0], blobs[1], scoring = 'accuracy', cv = 10)
In [ ]:
lr_scoring = cross_validation.cross_val_score(log_regressor, blobs[0], blobs[1], scoring = 'accuracy', cv = 10)
In [ ]:
lr_scoring
In [ ]:
print 'Ridge mean:{}, max:{}, min:{}, std:{}'.format(ridge_scoring.mean(), ridge_scoring.max(),
ridge_scoring.min(), ridge_scoring.std())
In [ ]:
print 'Log mean:{}, max:{}, min:{}, std:{}'.format(lr_scoring.mean(), lr_scoring.max(),
lr_scoring.min(), lr_scoring.std())
In [ ]:
scorer = metrics.make_scorer(metrics.accuracy_score)
In [ ]:
cv_strategy = cross_validation.StratifiedShuffleSplit(blobs[1], n_iter = 20 , test_size = 0.3, random_state = 2)
In [ ]:
ridge_scoring = cross_validation.cross_val_score(ridge_classifier, blobs[0], blobs[1], scoring = scorer, cv = cv_strategy)
In [ ]:
lr_scoring = cross_validation.cross_val_score(log_regressor, blobs[0], blobs[1], scoring = scorer, cv = cv_strategy)
In [ ]:
print 'Ridge mean:{}, max:{}, min:{}, std:{}'.format(ridge_scoring.mean(), ridge_scoring.max(),
ridge_scoring.min(), ridge_scoring.std())
In [ ]:
print 'Log mean:{}, max:{}, min:{}, std:{}'.format(lr_scoring.mean(), lr_scoring.max(),
lr_scoring.min(), lr_scoring.std())