RUN PREDICTION (Gradient Boosting Model)


In [16]:
import pandas as pd
import pickle

LOAD TEST DATA


In [10]:
df = pd.read_csv('https://s3.amazonaws.com/marweezys-bucket/all_state_insurance_prediction/test.csv')
ids = list(df['id'])
df.drop(labels='id', axis=1, inplace=True)
one_hot_df = pd.get_dummies(df.ix[:,:])
one_hot_df['cat116_W'] = 0
one_hot_df.head()


Out[10]:
cont1 cont2 cont3 cont4 cont5 cont6 cont7 cont8 cont9 cont10 ... cat116_MX cat116_N cat116_O cat116_Q cat116_R cat116_S cat116_T cat116_U cat116_Y cat116_W
0 0.321594 0.299102 0.246911 0.402922 0.281143 0.466591 0.317681 0.61229 0.34365 0.38016 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0
1 0.634734 0.620805 0.654310 0.946616 0.836443 0.482425 0.443760 0.71330 0.51890 0.60401 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0
2 0.290813 0.737068 0.711159 0.412789 0.718531 0.212308 0.325779 0.29758 0.34365 0.30529 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0
3 0.268622 0.681761 0.592681 0.354893 0.397069 0.369930 0.342355 0.40028 0.33237 0.31480 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0
4 0.553846 0.299102 0.263570 0.696873 0.302678 0.398862 0.391833 0.23688 0.43731 0.50556 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0

5 rows × 1117 columns

LOAD MODEL


In [11]:
with open('GB_feats.plk','rb') as f:
    feats = pickle.load(f)
with open('GB_model.plk','rb') as f:
    model = pickle.load(f)

MAKE PREDICTIONS


In [13]:
X = one_hot_df[feats]
y_pred = model.predict(X)

SAVE/SHOW PREDICTIONS


In [19]:
GB_d = {}
GB_d['id'] = ids
GB_d['loss'] = y_pred
GB_df = pd.DataFrame(GB_d)
GB_df.to_csv('GB_preds.csv', sep=',', index=False)
GB_df.head()


Out[19]:
id loss
0 4 1579.628558
1 6 2265.133652
2 9 9989.328296
3 12 6191.534165
4 15 1151.990087