In [2]:
import sys
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.cross_validation import train_test_split

In [3]:
cd kaggle


/Users/martiom/kaggle

In [4]:
train_file = 'train_processed.csv'
test_file = 'test_processed.csv'
train_df = pd.read_csv(train_file)
test_df = pd.read_csv(test_file)
target_col = 'QuoteConversion_Flag'
features_col = [x for x in train_df.columns if x not in [target_col, 'QuoteNumber']]

In [5]:
training, testing = train_test_split(train_df, train_size=0.8)
training.info()
testing.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 208602 entries, 61019 to 237179
Columns: 604 entries, Field7 to QuoteNumber
dtypes: float64(602), int64(2)
memory usage: 962.9 MB
<class 'pandas.core.frame.DataFrame'>
Int64Index: 52151 entries, 155387 to 142363
Columns: 604 entries, Field7 to QuoteNumber
dtypes: float64(602), int64(2)
memory usage: 240.7 MB

In [33]:
%%timeit -n 1 -r 1
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier as GBC
rl_clf = GBC(n_estimators = 500) #RandomForestClassifier(n_estimators=50)

rl_clf.fit(training[features_col], training[target_col])


0.920442561025
1 loops, best of 1: 11min 44s per loop

In [34]:
print rl_clf.score(training[features_col], training[target_col])
print rl_clf.score(testing[features_col], testing[target_col])


0.922992109376

In [11]:
t.shape


Out[11]:
(173836, 2)

In [12]:
test_df['QuoteConversion_Flag'] = t[:,1]
test_df[['QuoteNumber', 'QuoteConversion_Flag']].to_csv('submission.csv', index=False)

In [13]:
import xgboost as xgb

In [31]:
%%timeit -n 1 -r 1 
# xgb_clf = xgb.XGBClassifier(n_estimators = 100, nthread=-1, max_depth=4, learning_rate=0.05,subsample=0.5,colsample_bytree=0.8)

xgb_clf = xgb.XGBClassifier()
xgb_model = xgb_clf.fit(training[features_col], training[target_col], eval_metric = "auc")


1 loops, best of 1: 5min 18s per loop

In [32]:
xgb_clf.score(testing[features_col], testing[target_col])


Out[32]:
0.91674176909359362