In [ ]:
from sklearn import cross_validation, datasets, linear_model, metrics
from matplotlib.colors import ListedColormap
In [ ]:
%pylab inline
In [ ]:
clf_data, clf_target = datasets.make_classification(n_features = 2, n_informative = 2, n_classes = 2,
n_redundant = 0, n_clusters_per_class = 1,
random_state = 7)
In [ ]:
reg_data, reg_target = datasets.make_regression(n_features = 2, n_informative = 1, n_targets = 1,
noise = 5., random_state = 7)
In [ ]:
colors = ListedColormap(['red', 'blue'])
pylab.scatter(map(lambda x: x[0], clf_data), map(lambda x: x[1], clf_data), c = clf_target, cmap = colors)
In [ ]:
pylab.scatter(map(lambda x:x[1], reg_data), reg_target, color = 'r')
pylab.scatter(map(lambda x:x[0], reg_data), reg_target, color = 'b')
In [ ]:
clf_train_data, clf_test_data, clf_train_labels, clf_test_labels = cross_validation.train_test_split(clf_data, clf_target,
test_size = 0.3, random_state = 1)
In [ ]:
reg_train_data, reg_test_data, reg_train_labels, reg_test_labels = cross_validation.train_test_split(reg_data, reg_target,
test_size = 0.3, random_state = 1)
In [ ]:
classifier = linear_model.SGDClassifier(loss = 'log', random_state = 1)
In [ ]:
classifier.fit(clf_train_data, clf_train_labels)
In [ ]:
predictions = classifier.predict(clf_test_data)
In [ ]:
probability_predictions = classifier.predict_proba(clf_test_data)
In [ ]:
print clf_test_labels
In [ ]:
print predictions
In [ ]:
print probability_predictions
In [ ]:
sum([1. if pair[0] == pair[1] else 0. for pair in zip(clf_test_labels, predictions)])/len(clf_test_labels)
In [ ]:
metrics.accuracy_score(clf_test_labels, predictions)
In [ ]:
matrix = metrics.confusion_matrix(clf_test_labels, predictions)
print matrix
In [ ]:
sum([1 if pair[0] == pair[1] else 0 for pair in zip(clf_test_labels, predictions)])
In [ ]:
matrix.diagonal().sum()
In [ ]:
metrics.precision_score(clf_test_labels, predictions, pos_label = 0)
In [ ]:
metrics.precision_score(clf_test_labels, predictions)
In [ ]:
metrics.recall_score(clf_test_labels, predictions, pos_label = 0)
In [ ]:
metrics.recall_score(clf_test_labels, predictions)
In [ ]:
metrics.f1_score(clf_test_labels, predictions, pos_label = 0)
In [ ]:
metrics.f1_score(clf_test_labels, predictions)
In [ ]:
print metrics.classification_report(clf_test_labels, predictions)
In [ ]:
fpr, tpr, _ = metrics.roc_curve(clf_test_labels, probability_predictions[:,1])
In [ ]:
pylab.plot(fpr, tpr, label = 'linear model')
pylab.plot([0, 1], [0, 1], '--', color = 'grey', label = 'random')
pylab.xlim([-0.05, 1.05])
pylab.ylim([-0.05, 1.05])
pylab.xlabel('False Positive Rate')
pylab.ylabel('True Positive Rate')
pylab.title('ROC curve')
pylab.legend(loc = "lower right")
In [ ]:
metrics.roc_auc_score(clf_test_labels, predictions)
In [ ]:
metrics.roc_auc_score(clf_test_labels, probability_predictions[:,1])
In [ ]:
metrics.average_precision_score(clf_test_labels, predictions)
In [ ]:
metrics.log_loss(clf_test_labels, probability_predictions[:,1])
In [ ]:
regressor = linear_model.SGDRegressor(random_state = 1, n_iter = 20)
In [ ]:
regressor.fit(reg_train_data, reg_train_labels)
In [ ]:
reg_predictions = regressor.predict(reg_test_data)
In [ ]:
print reg_test_labels
In [ ]:
print reg_predictions
In [ ]:
metrics.mean_absolute_error(reg_test_labels, reg_predictions)
In [ ]:
metrics.mean_squared_error(reg_test_labels, reg_predictions)
In [ ]:
sqrt(metrics.mean_squared_error(reg_test_labels, reg_predictions))
In [ ]:
metrics.r2_score(reg_test_labels, reg_predictions)