In [1]:
import numpy as np
import pylab as pl
from sklearn import svm, datasets
from sklearn.utils import shuffle
from sklearn.metrics import roc_curve, auc

random_state = np.random.RandomState(0)

# Import some data to play with
iris = datasets.load_iris()
X = iris.data
y = iris.target

# Make it a binary classification problem by removing the third class
X, y = X[y != 2], y[y != 2]
n_samples, n_features = X.shape

# Add noisy features to make the problem harder
X = np.c_[X, random_state.randn(n_samples, 200 * n_features)]

# shuffle and split training and test sets
X, y = shuffle(X, y, random_state=random_state)
half = int(n_samples / 2)
X_train, X_test = X[:half], X[half:]
y_train, y_test = y[:half], y[half:]

In [2]:
half


Out[2]:
50

In [4]:
# Run classifier
classifier = svm.SVC(kernel='linear', probability=True)
probas_ = classifier.fit(X_train, y_train).predict_proba(X_test)

In [ ]:
# Compute ROC curve and area the curve
fpr, tpr, thresholds = roc_curve(y_test, probas_[:, 1])
roc_auc = auc(fpr, tpr)
print "Area under the ROC curve : %f" % roc_auc

In [ ]:
# Plot ROC curve
pl.clf()
pl.plot(fpr, tpr, label='ROC curve (area = %0.2f)' % roc_auc)
pl.plot([0, 1], [0, 1], 'k--')
pl.xlim([0.0, 1.0])
pl.ylim([0.0, 1.0])
pl.xlabel('False Positive Rate')
pl.ylabel('True Positive Rate')
pl.title('Receiver operating characteristic example')
pl.legend(loc="lower right")
pl.show()

In [1]:
import pandas as pd

In [2]:
import numpy as np

In [3]:
frame = pd.DataFrame(np.random.randn(1000, 5), columns=['a', 'b', 'c', 'd', 'e'])

In [4]:
frame.corr(method="pearson")


Out[4]:
a b c d e
a 1.000000 0.002065 0.009580 0.013859 0.048539
b 0.002065 1.000000 0.050399 -0.035548 -0.038536
c 0.009580 0.050399 1.000000 -0.015307 0.019464
d 0.013859 -0.035548 -0.015307 1.000000 0.031843
e 0.048539 -0.038536 0.019464 0.031843 1.000000

In [5]:
frame.corr(method="spearman")


Out[5]:
a b c d e
a 1.000000 0.000145 0.013891 0.025213 0.050536
b 0.000145 1.000000 0.052310 -0.053390 -0.027948
c 0.013891 0.052310 1.000000 0.003087 0.026272
d 0.025213 -0.053390 0.003087 1.000000 0.026370
e 0.050536 -0.027948 0.026272 0.026370 1.000000

In [6]:
frame.corr(method="kendall")


Out[6]:
a b c d e
a 1.000000 0.000064 0.009389 0.017554 0.033838
b 0.000064 1.000000 0.034663 -0.036332 -0.018302
c 0.009389 0.034663 1.000000 0.003183 0.018026
d 0.017554 -0.036332 0.003183 1.000000 0.017814
e 0.033838 -0.018302 0.018026 0.017814 1.000000

In [ ]: