In [1]:
%matplotlib inline
import pylab as pl
import numpy as np
import scipy as sp
import pandas as pd
from datetime import datetime, timedelta
import util
In [2]:
X = util.fetch(util.cache_path('train_X_before_2014-08-01_22-00-47'))
y = util.fetch(util.cache_path('train_y_before_2014-08-01_22-00-47'))
In [3]:
X[X[:, :45] > 0] = 1.
In [4]:
X[:, 47] = X[:, 47] / 3.
In [5]:
X[:, 48] = X[:, 48] / 6.
In [6]:
X[:, 49] = X[:, 49] / 20104.
In [7]:
X[:, 50] = X[:, 50] / 20104.
In [8]:
X[:, 141:] = X[:, 141:] / np.amax(X, axis=0)[141:]
In [9]:
i = 55
while i < 96:
X[:, i] = X[:, i] / np.max(X[:, i])
i += 5
In [10]:
from sklearn.svm import LinearSVC
from sklearn.cross_validation import cross_val_score, StratifiedKFold
In [11]:
svc = LinearSVC(dual=False, class_weight='auto')
In [12]:
cross_val_score(svc, X, y, scoring='roc_auc', cv=StratifiedKFold(y, 5))
Out[12]:
In [13]:
svc.fit(X, y)
Out[13]:
In [14]:
from sklearn.metrics import roc_auc_score
In [15]:
from sklearn.calibration import CalibratedClassifierCV
clf = CalibratedClassifierCV(svc, cv=StratifiedKFold(y, 5),
method='isotonic')
clf.fit(X, y)
roc_auc_score(y, clf.predict_proba(X)[:, 1])
Out[15]:
In [16]:
svc.coef_[0][0:45]
Out[16]:
In [17]:
from sklearn.linear_model import LogisticRegressionCV
lr = LogisticRegressionCV(cv=5, scoring='roc_auc')
lr.fit(X, y)
roc_auc_score(y, lr.predict_proba(X)[:, 1])
Out[17]:
In [19]:
lr.coef_[0][0:45]
Out[19]:
In [ ]: