In [1]:
df = pd.read_csv('numerai_training_data.csv')
df.sample(5)
X = df.drop('target', axis=1)
y = df['target']
print X.shape
In [2]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
clf = LogisticRegression()
scores = cross_val_score(clf, X, y, scoring='neg_log_loss', cv=4, n_jobs=4)
print scores
print -np.mean(scores)
In [3]:
from xgboost import XGBClassifier
clf = XGBClassifier(n_estimators=100)
scores = cross_val_score(clf, X, y, scoring='neg_log_loss', cv=4, n_jobs=4)
print scores
print -np.mean(scores)
In [4]:
from sklearn.ensemble import GradientBoostingClassifier
clf = GradientBoostingClassifier(n_estimators=100)
scores = cross_val_score(clf, X, y, scoring='neg_log_loss', cv=4, n_jobs=4)
print scores
print -np.mean(scores)