This notebook estimates the price for a rental if it has medium interest, the idea being to create a framing effect for the model.


In [4]:
import os
import sys
import operator
import numpy as np
import pandas as pd
from scipy import sparse
import xgboost as xgb
import random
from sklearn import model_selection, preprocessing, ensemble
from sklearn.metrics import log_loss
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer

import pickle

import sklearn.cluster

import Levenshtein

from multiprocessing import Pool

In [5]:
train_df = pd.read_pickle('fin-dprep-train.pkl')
test_df = pd.read_pickle('fin-dprep-test.pkl')

features_to_use = pickle.load(open('fin-dprep-flist.pkl', 'rb'))

In [6]:
train_df.price = train_df.price.clip(0, 13000)

In [7]:
class MeansProcessor:
    def __init__(self, key, outkey = None, tgt = 'interest_cat'):
        self.key = key
        self.outkey = key if outkey is None else outkey
        
        self.count = {}
        self.means = {}
        self.std = {}
        self.global_means = 0
        
        self.tgt = tgt
        
        self.outkeys = [self.outkey + '_level', self.outkey + '_level_std']
        
    def fit(self, df):
        self.global_means = df[self.tgt].mean()
            
        for k in df.groupby(self.key, sort=False):
            
            self.count[k[0]] = len(k[1])

            if len(k[1]) < 0:
                self.means[k[0]] = np.nan
                self.std[k[0]] = np.nan
            else:
                self.means[k[0]] = np.mean(k[1][self.tgt])
                self.std[k[0]] = np.std(k[1][self.tgt])
            
    def predict(self, df):
        for l in self.outkeys:
            df[l] = np.nan # self.global_means[l]
            
        df[self.outkey + '_count'] = 0
            
        for k in df.groupby(self.key, sort=False):
            if k[0] == 0:
                continue
            
            if k[0] in self.means:
                df.loc[k[1].index, self.outkey + '_count'] = self.count[k[0]]
                df.loc[k[1].index, self.outkey + '_level'] = self.means[k[0]]
                df.loc[k[1].index, self.outkey + '_level_std'] = self.std[k[0]]
        
        return df
    
    def get_features(self):
        return self.outkeys.copy() + [self.outkey + '_count']

# i kept the same index randomization (with fixed seed) so I could validate this code against
# the original...

target_num_map = {'low':0, 'medium':1, 'high':2}
train_y = np.array(train_df['interest_level'].apply(lambda x: target_num_map[x]))

def proc_fold(fold):
    train_index = fold[0]
    test_index = fold[1]
    
    cv_train = train_df.iloc[train_index]
    cv_valid = train_df.iloc[test_index][['interest_level', 'manager_id', 'building_id']]
    cv_test = test_df.copy()
    
    m_build = MeansProcessor('building_id', 'building_sort')
    m_build.fit(cv_train)
    cv_valid = m_build.predict(cv_valid)
    cv_test = m_build.predict(cv_test)

    m_mgr = MeansProcessor('manager_id', 'manager_sort')
    m_mgr.fit(cv_train)
    cv_valid = m_mgr.predict(cv_valid)
    cv_test = m_mgr.predict(cv_test)

    m_comb = MeansProcessor(['building_id', 'manager_id'], 'mb_comb')
    m_comb.fit(cv_train)
    cv_valid = m_comb.predict(cv_valid)
    cv_test = m_comb.predict(cv_test)

    return cv_train, cv_valid, cv_test

kf = model_selection.StratifiedKFold(n_splits=5, shuffle=True, random_state=2016)
folds = [(k[0], k[1]) for k in kf.split(list(range(train_df.shape[0])), train_y)]

#with Pool(5) as pool:
#    rv = pool.map(proc_fold, folds)

import pickle

try:
    rv = pickle.load(open('0420-model-groupfeatures.pkl', 'rb'))
except:
    with Pool(5) as pool:
        rv = pool.map(proc_fold, folds)

        pickle.dump(rv, open('0420-model-groupfeatures.pkl', 'wb'))

# dummies to get feature id's
m_build = MeansProcessor('building_id', 'building_sort')
m_mgr = MeansProcessor('manager_id', 'manager_sort')
m_comb = MeansProcessor(['building_id', 'manager_id'], 'mb_comb')

group_features = m_build.get_features() + m_mgr.get_features() + m_comb.get_features()

cv_test = []
for r in rv:
    cv_test.append(test_df.merge(r[2][group_features], left_index=True, right_index=True))

cv_allvalid = pd.concat([r[1] for r in rv])

train_df = train_df.merge(cv_allvalid[group_features], left_index=True, right_index=True)

In [8]:
#fl = features_to_use + m_build.get_features() + m_mgr.get_features() + m_comb.get_features() + tfidf_fn
fl = features_to_use  + group_features 

fl.remove('price')
fl.remove('price_t')
fl.remove('price_per_room')

fl.append('density_exp01')

In [9]:
for f in fl:
    if 'price' in f:
        print(f)

In [10]:
def run_to_stackdf(run):
    df_testpreds = pd.DataFrame(run[2].mean(axis=0))
    df_testpreds.columns = ['level']
    df_testpreds['listing_id'] = cv_test[0].listing_id
    df_allpreds = pd.concat([run[1][['level', 'listing_id']], df_testpreds])

    df_allpreds.sort_values('listing_id', inplace=True)
    df_allpreds.set_index('listing_id', inplace=True)

    return df_allpreds

In [11]:
def runXGB1(train_X, train_y, test_X, test_y=None, feature_names=None, seed_val=0, num_rounds=4000):
    param = {}
    param['objective'] = 'reg:linear'
    #param['tree_method'] = 'hist'
    param['eta'] = 0.02
    param['max_depth'] = 8
    param['silent'] = 1
    param['num_class'] = 1
    param['eval_metric'] = "rmse"
    param['min_child_weight'] = 1
    param['subsample'] = 0.7
    param['colsample_bytree'] = 0.7
    param['seed'] = seed_val
    param['base_score'] = train_y.mean()
    num_rounds = num_rounds

    plst = list(param.items())
    xgtrain = xgb.DMatrix(train_X, label=train_y)

    if test_y is not None:
        xgtest = xgb.DMatrix(test_X, label=test_y)
        watchlist = [ (xgtrain,'train'), (xgtest, 'test') ]
        model = xgb.train(plst, xgtrain, num_rounds, watchlist, early_stopping_rounds=50, verbose_eval=10)
    else:
        xgtest = xgb.DMatrix(test_X)
        model = xgb.train(plst, xgtrain, num_rounds)

    pred_test_y = model.predict(xgtest, ntree_limit=model.best_ntree_limit)
    return pred_test_y, model

In [12]:
def run_cv1(train_df, cv_test, kf, features_to_use):
    
    cv_preds = []
    cv_scores = []
    models = []
    test_preds = []
    
    fold = 0

    for dev_index, val_index in kf.split(train_df, train_df.interest_cat):

        cv_cur_train = train_df.loc[dev_index]
        cv_cur_train = cv_cur_train[cv_cur_train.interest_cat == 1]
        
        cv_cur_valid = train_df.loc[val_index]
        cv_cur_valid_cut = cv_cur_valid[cv_cur_valid.interest_cat == 1]
        
        dev_X, val_X = cv_cur_train[features_to_use], cv_cur_valid_cut[features_to_use]
        dev_y, val_y = cv_cur_train['price'], cv_cur_valid_cut['price']

        #dev_X, val_X = cv_cur_train[features_to_use], cv_cur_valid[features_to_use]
        #dev_y, val_y = cv_cur_train['price'], cv_cur_valid['price']
        
        preds, model = runXGB1(dev_X, dev_y, val_X, val_y)
        models.append(model)

        cv_scores.append(model.best_score)
        print(cv_scores)

        
        fullpreds = model.predict(xgb.DMatrix(cv_cur_valid[features_to_use]), ntree_limit=model.best_ntree_limit)
        
        cut_df = train_df.loc[val_index]
        
        out_df = pd.DataFrame(fullpreds)
        out_df.columns = ["predicted_price"]
        out_df["listing_id"] = cut_df.listing_id.values
        out_df['interest_tgt'] = cut_df.interest.values
        out_df['interest_cat'] = cut_df.interest_cat.values
        out_df['price'] = cut_df.price.values

        cv_preds.append(out_df)

        xgtest = xgb.DMatrix(cv_test[fold][features_to_use])
        test_preds.append(model.predict(xgtest, ntree_limit=model.best_ntree_limit))

    df_cv = pd.concat(cv_preds)
    apreds = np.array(test_preds)
    
    return models, df_cv, apreds

In [13]:
kfold = model_selection.KFold(n_splits=5, shuffle=True, random_state=2016)
rv1 = run_cv1(train_df, cv_test, kfold, fl)


[0]	train-rmse:1213.05	test-rmse:1284.81
Multiple eval metrics have been passed: 'test-rmse' will be used for early stopping.

Will train until test-rmse hasn't improved in 50 rounds.
[10]	train-rmse:1051.72	test-rmse:1127.84
[20]	train-rmse:924.454	test-rmse:1010.26
[30]	train-rmse:804.561	test-rmse:897.988
[40]	train-rmse:711.279	test-rmse:812.905
[50]	train-rmse:635.799	test-rmse:747.443
[60]	train-rmse:576.518	test-rmse:697.743
[70]	train-rmse:525.244	test-rmse:656.018
[80]	train-rmse:484.793	test-rmse:626.959
[90]	train-rmse:447.884	test-rmse:600.06
[100]	train-rmse:418.891	test-rmse:579.669
[110]	train-rmse:395.049	test-rmse:564.876
[120]	train-rmse:373.166	test-rmse:551.338
[130]	train-rmse:354.644	test-rmse:539.967
[140]	train-rmse:338.717	test-rmse:530.607
[150]	train-rmse:325.098	test-rmse:523.166
[160]	train-rmse:312.994	test-rmse:517.954
[170]	train-rmse:302.235	test-rmse:512.991
[180]	train-rmse:293.054	test-rmse:509.003
[190]	train-rmse:284.417	test-rmse:505.015
[200]	train-rmse:277.277	test-rmse:502.876
[210]	train-rmse:270.375	test-rmse:500.521
[220]	train-rmse:263.66	test-rmse:498.433
[230]	train-rmse:258.086	test-rmse:497.194
[240]	train-rmse:252.117	test-rmse:495.608
[250]	train-rmse:247.35	test-rmse:494.463
[260]	train-rmse:242.838	test-rmse:493.426
[270]	train-rmse:238.465	test-rmse:492.297
[280]	train-rmse:234.724	test-rmse:491.716
[290]	train-rmse:230.511	test-rmse:490.661
[300]	train-rmse:227.156	test-rmse:489.92
[310]	train-rmse:222.889	test-rmse:489.509
[320]	train-rmse:220.009	test-rmse:489.155
[330]	train-rmse:216.752	test-rmse:488.397
[340]	train-rmse:213.843	test-rmse:487.772
[350]	train-rmse:210.611	test-rmse:487.334
[360]	train-rmse:207.544	test-rmse:486.555
[370]	train-rmse:204.098	test-rmse:486.006
[380]	train-rmse:201.548	test-rmse:485.472
[390]	train-rmse:198.524	test-rmse:485.179
[400]	train-rmse:195.981	test-rmse:484.996
[410]	train-rmse:192.812	test-rmse:484.604
[420]	train-rmse:190.098	test-rmse:484.326
[430]	train-rmse:188.046	test-rmse:484.006
[440]	train-rmse:185.634	test-rmse:483.74
[450]	train-rmse:183.329	test-rmse:483.388
[460]	train-rmse:181.022	test-rmse:483.258
[470]	train-rmse:178.57	test-rmse:483.061
[480]	train-rmse:176.351	test-rmse:482.786
[490]	train-rmse:173.954	test-rmse:482.519
[500]	train-rmse:172.07	test-rmse:482.28
[510]	train-rmse:169.91	test-rmse:482.22
[520]	train-rmse:167.772	test-rmse:481.814
[530]	train-rmse:165.437	test-rmse:481.468
[540]	train-rmse:163.029	test-rmse:481.312
[550]	train-rmse:161.064	test-rmse:481.277
[560]	train-rmse:159.333	test-rmse:481.071
[570]	train-rmse:157.155	test-rmse:480.931
[580]	train-rmse:154.959	test-rmse:480.686
[590]	train-rmse:152.849	test-rmse:480.462
[600]	train-rmse:150.994	test-rmse:480.372
[610]	train-rmse:149.14	test-rmse:480.16
[620]	train-rmse:147.276	test-rmse:480.133
[630]	train-rmse:145.448	test-rmse:480.138
[640]	train-rmse:143.66	test-rmse:479.966
[650]	train-rmse:141.973	test-rmse:479.726
[660]	train-rmse:140.438	test-rmse:479.521
[670]	train-rmse:138.67	test-rmse:479.316
[680]	train-rmse:136.857	test-rmse:479.226
[690]	train-rmse:135.2	test-rmse:479.062
[700]	train-rmse:133.652	test-rmse:478.998
[710]	train-rmse:132.009	test-rmse:478.854
[720]	train-rmse:130.487	test-rmse:478.78
[730]	train-rmse:128.918	test-rmse:478.62
[740]	train-rmse:127.678	test-rmse:478.545
[750]	train-rmse:125.819	test-rmse:478.384
[760]	train-rmse:124.266	test-rmse:478.28
[770]	train-rmse:122.553	test-rmse:478.248
[780]	train-rmse:121.343	test-rmse:478.034
[790]	train-rmse:120.267	test-rmse:477.946
[800]	train-rmse:118.558	test-rmse:477.942
[810]	train-rmse:117.025	test-rmse:477.846
[820]	train-rmse:115.793	test-rmse:477.78
[830]	train-rmse:114.587	test-rmse:477.712
[840]	train-rmse:113.387	test-rmse:477.684
[850]	train-rmse:112.076	test-rmse:477.612
[860]	train-rmse:110.892	test-rmse:477.597
[870]	train-rmse:109.485	test-rmse:477.503
[880]	train-rmse:108.057	test-rmse:477.409
[890]	train-rmse:106.783	test-rmse:477.298
[900]	train-rmse:105.811	test-rmse:477.297
[910]	train-rmse:104.602	test-rmse:477.171
[920]	train-rmse:103.311	test-rmse:477.064
[930]	train-rmse:102.151	test-rmse:476.999
[940]	train-rmse:100.851	test-rmse:476.971
[950]	train-rmse:99.6915	test-rmse:476.868
[960]	train-rmse:98.6683	test-rmse:476.851
[970]	train-rmse:97.4892	test-rmse:476.881
[980]	train-rmse:96.2649	test-rmse:476.867
[990]	train-rmse:95.2792	test-rmse:476.855
[1000]	train-rmse:94.1789	test-rmse:476.742
[1010]	train-rmse:93.0124	test-rmse:476.676
[1020]	train-rmse:91.9001	test-rmse:476.568
[1030]	train-rmse:90.9944	test-rmse:476.561
[1040]	train-rmse:89.8657	test-rmse:476.59
[1050]	train-rmse:88.8762	test-rmse:476.601
[1060]	train-rmse:87.7753	test-rmse:476.586
[1070]	train-rmse:86.8571	test-rmse:476.515
[1080]	train-rmse:85.6825	test-rmse:476.521
[1090]	train-rmse:84.562	test-rmse:476.444
[1100]	train-rmse:83.4514	test-rmse:476.405
[1110]	train-rmse:82.3733	test-rmse:476.438
[1120]	train-rmse:81.6077	test-rmse:476.422
[1130]	train-rmse:80.5827	test-rmse:476.378
[1140]	train-rmse:79.6431	test-rmse:476.334
[1150]	train-rmse:78.8449	test-rmse:476.292
[1160]	train-rmse:78.0604	test-rmse:476.317
[1170]	train-rmse:77.0835	test-rmse:476.232
[1180]	train-rmse:76.189	test-rmse:476.212
[1190]	train-rmse:75.4969	test-rmse:476.183
[1200]	train-rmse:74.6407	test-rmse:476.099
[1210]	train-rmse:73.8245	test-rmse:476.079
[1220]	train-rmse:72.9986	test-rmse:476.037
[1230]	train-rmse:72.3042	test-rmse:475.959
[1240]	train-rmse:71.4138	test-rmse:475.934
[1250]	train-rmse:70.6351	test-rmse:475.929
[1260]	train-rmse:70.064	test-rmse:475.893
[1270]	train-rmse:69.2299	test-rmse:475.817
[1280]	train-rmse:68.3732	test-rmse:475.854
[1290]	train-rmse:67.569	test-rmse:475.826
[1300]	train-rmse:66.8441	test-rmse:475.79
[1310]	train-rmse:66.1295	test-rmse:475.762
[1320]	train-rmse:65.4545	test-rmse:475.719
[1330]	train-rmse:64.7191	test-rmse:475.68
[1340]	train-rmse:64.0691	test-rmse:475.68
[1350]	train-rmse:63.4297	test-rmse:475.655
[1360]	train-rmse:62.628	test-rmse:475.561
[1370]	train-rmse:61.9509	test-rmse:475.509
[1380]	train-rmse:61.3338	test-rmse:475.464
[1390]	train-rmse:60.6704	test-rmse:475.434
[1400]	train-rmse:60.0192	test-rmse:475.36
[1410]	train-rmse:59.3156	test-rmse:475.314
[1420]	train-rmse:58.711	test-rmse:475.316
[1430]	train-rmse:58.0369	test-rmse:475.306
[1440]	train-rmse:57.4924	test-rmse:475.247
[1450]	train-rmse:56.845	test-rmse:475.246
[1460]	train-rmse:56.2871	test-rmse:475.234
[1470]	train-rmse:55.7393	test-rmse:475.198
[1480]	train-rmse:55.139	test-rmse:475.213
[1490]	train-rmse:54.5358	test-rmse:475.206
[1500]	train-rmse:53.9779	test-rmse:475.181
[1510]	train-rmse:53.3852	test-rmse:475.16
[1520]	train-rmse:52.7674	test-rmse:475.099
[1530]	train-rmse:52.0161	test-rmse:475.101
[1540]	train-rmse:51.4975	test-rmse:475.09
[1550]	train-rmse:50.9187	test-rmse:475.087
[1560]	train-rmse:50.3481	test-rmse:475.066
[1570]	train-rmse:49.8653	test-rmse:475.044
[1580]	train-rmse:49.3394	test-rmse:475.041
[1590]	train-rmse:48.6837	test-rmse:475.043
[1600]	train-rmse:48.1167	test-rmse:475.03
[1610]	train-rmse:47.6452	test-rmse:475.06
[1620]	train-rmse:47.1741	test-rmse:475.067
[1630]	train-rmse:46.7221	test-rmse:475.05
[1640]	train-rmse:46.1862	test-rmse:475.015
[1650]	train-rmse:45.7718	test-rmse:475.045
[1660]	train-rmse:45.3099	test-rmse:475.015
[1670]	train-rmse:44.81	test-rmse:475.059
[1680]	train-rmse:44.3215	test-rmse:475.043
[1690]	train-rmse:43.9192	test-rmse:475.01
[1700]	train-rmse:43.444	test-rmse:474.999
[1710]	train-rmse:42.9495	test-rmse:475.021
[1720]	train-rmse:42.5485	test-rmse:475.004
[1730]	train-rmse:42.1202	test-rmse:474.994
[1740]	train-rmse:41.673	test-rmse:474.989
[1750]	train-rmse:41.2513	test-rmse:474.969
[1760]	train-rmse:40.8384	test-rmse:474.959
[1770]	train-rmse:40.3599	test-rmse:474.961
[1780]	train-rmse:39.9016	test-rmse:474.941
[1790]	train-rmse:39.5008	test-rmse:474.942
[1800]	train-rmse:39.1676	test-rmse:474.957
[1810]	train-rmse:38.7864	test-rmse:474.927
[1820]	train-rmse:38.3646	test-rmse:474.934
[1830]	train-rmse:37.9505	test-rmse:474.906
[1840]	train-rmse:37.5823	test-rmse:474.886
[1850]	train-rmse:37.089	test-rmse:474.874
[1860]	train-rmse:36.7253	test-rmse:474.852
[1870]	train-rmse:36.3881	test-rmse:474.872
[1880]	train-rmse:36.0308	test-rmse:474.885
[1890]	train-rmse:35.6055	test-rmse:474.898
[1900]	train-rmse:35.2575	test-rmse:474.877
[1910]	train-rmse:34.8198	test-rmse:474.844
[1920]	train-rmse:34.4222	test-rmse:474.833
[1930]	train-rmse:34.1145	test-rmse:474.836
[1940]	train-rmse:33.6991	test-rmse:474.849
[1950]	train-rmse:33.3645	test-rmse:474.82
[1960]	train-rmse:32.9688	test-rmse:474.796
[1970]	train-rmse:32.6491	test-rmse:474.803
[1980]	train-rmse:32.3281	test-rmse:474.804
[1990]	train-rmse:31.9939	test-rmse:474.791
[2000]	train-rmse:31.6919	test-rmse:474.785
[2010]	train-rmse:31.4032	test-rmse:474.801
[2020]	train-rmse:31.1147	test-rmse:474.814
[2030]	train-rmse:30.8213	test-rmse:474.8
[2040]	train-rmse:30.4573	test-rmse:474.804
Stopping. Best iteration:
[1999]	train-rmse:31.725	test-rmse:474.777

[474.776733]
[0]	train-rmse:1236.43	test-rmse:1144.97
Multiple eval metrics have been passed: 'test-rmse' will be used for early stopping.

Will train until test-rmse hasn't improved in 50 rounds.
[10]	train-rmse:1063.46	test-rmse:985.133
[20]	train-rmse:943.312	test-rmse:881.163
[30]	train-rmse:827.318	test-rmse:780.529
[40]	train-rmse:734.551	test-rmse:702.661
[50]	train-rmse:656.892	test-rmse:639.303
[60]	train-rmse:590.963	test-rmse:588.435
[70]	train-rmse:536.77	test-rmse:548.761
[80]	train-rmse:494.537	test-rmse:520.366
[90]	train-rmse:458.417	test-rmse:497.291
[100]	train-rmse:426.832	test-rmse:478.521
[110]	train-rmse:400.758	test-rmse:465.034
[120]	train-rmse:378.341	test-rmse:454.239
[130]	train-rmse:359.114	test-rmse:446.319
[140]	train-rmse:343.431	test-rmse:441.059
[150]	train-rmse:329.754	test-rmse:435.941
[160]	train-rmse:317.447	test-rmse:432.042
[170]	train-rmse:306.792	test-rmse:428.822
[180]	train-rmse:297.59	test-rmse:426.226
[190]	train-rmse:289.128	test-rmse:423.671
[200]	train-rmse:281.081	test-rmse:421.437
[210]	train-rmse:274.198	test-rmse:419.622
[220]	train-rmse:267.375	test-rmse:417.959
[230]	train-rmse:260.969	test-rmse:416.271
[240]	train-rmse:255.461	test-rmse:415.182
[250]	train-rmse:250.471	test-rmse:414.429
[260]	train-rmse:245.601	test-rmse:413.29
[270]	train-rmse:241.067	test-rmse:412.53
[280]	train-rmse:237.307	test-rmse:411.807
[290]	train-rmse:233.692	test-rmse:410.883
[300]	train-rmse:229.189	test-rmse:409.946
[310]	train-rmse:225.584	test-rmse:409.356
[320]	train-rmse:222.103	test-rmse:408.72
[330]	train-rmse:219.226	test-rmse:408.114
[340]	train-rmse:216.194	test-rmse:407.546
[350]	train-rmse:212.677	test-rmse:407.025
[360]	train-rmse:209.81	test-rmse:406.715
[370]	train-rmse:207.227	test-rmse:406.347
[380]	train-rmse:204.101	test-rmse:405.598
[390]	train-rmse:201.13	test-rmse:405.343
[400]	train-rmse:197.675	test-rmse:404.788
[410]	train-rmse:195.355	test-rmse:404.381
[420]	train-rmse:192.851	test-rmse:404.046
[430]	train-rmse:190.163	test-rmse:403.846
[440]	train-rmse:187.767	test-rmse:403.454
[450]	train-rmse:185.557	test-rmse:403.037
[460]	train-rmse:182.937	test-rmse:402.784
[470]	train-rmse:180.772	test-rmse:402.457
[480]	train-rmse:178.219	test-rmse:402.184
[490]	train-rmse:175.435	test-rmse:401.828
[500]	train-rmse:173.632	test-rmse:401.5
[510]	train-rmse:171.288	test-rmse:401.329
[520]	train-rmse:169.348	test-rmse:401.311
[530]	train-rmse:166.981	test-rmse:401.216
[540]	train-rmse:164.71	test-rmse:401.048
[550]	train-rmse:162.537	test-rmse:400.777
[560]	train-rmse:160.586	test-rmse:400.577
[570]	train-rmse:158.128	test-rmse:400.16
[580]	train-rmse:156.233	test-rmse:400.072
[590]	train-rmse:154.49	test-rmse:399.903
[600]	train-rmse:152.603	test-rmse:399.857
[610]	train-rmse:150.626	test-rmse:399.717
[620]	train-rmse:148.892	test-rmse:399.606
[630]	train-rmse:147.096	test-rmse:399.596
[640]	train-rmse:145.024	test-rmse:399.41
[650]	train-rmse:142.932	test-rmse:399.141
[660]	train-rmse:141.509	test-rmse:399.079
[670]	train-rmse:139.308	test-rmse:398.782
[680]	train-rmse:137.982	test-rmse:398.604
[690]	train-rmse:136.337	test-rmse:398.454
[700]	train-rmse:134.972	test-rmse:398.473
[710]	train-rmse:133.322	test-rmse:398.273
[720]	train-rmse:131.65	test-rmse:398.088
[730]	train-rmse:130.048	test-rmse:397.916
[740]	train-rmse:128.574	test-rmse:397.801
[750]	train-rmse:127.216	test-rmse:397.719
[760]	train-rmse:125.584	test-rmse:397.574
[770]	train-rmse:124.032	test-rmse:397.551
[780]	train-rmse:122.804	test-rmse:397.4
[790]	train-rmse:121.235	test-rmse:397.334
[800]	train-rmse:119.861	test-rmse:397.237
[810]	train-rmse:118.372	test-rmse:397.095
[820]	train-rmse:117.043	test-rmse:397.008
[830]	train-rmse:115.659	test-rmse:396.903
[840]	train-rmse:114.372	test-rmse:396.821
[850]	train-rmse:113.167	test-rmse:396.772
[860]	train-rmse:111.893	test-rmse:396.699
[870]	train-rmse:110.782	test-rmse:396.673
[880]	train-rmse:109.446	test-rmse:396.581
[890]	train-rmse:108.371	test-rmse:396.587
[900]	train-rmse:107.095	test-rmse:396.579
[910]	train-rmse:105.915	test-rmse:396.475
[920]	train-rmse:104.729	test-rmse:396.423
[930]	train-rmse:103.378	test-rmse:396.342
[940]	train-rmse:102.315	test-rmse:396.312
[950]	train-rmse:101.138	test-rmse:396.334
[960]	train-rmse:99.9691	test-rmse:396.22
[970]	train-rmse:98.9173	test-rmse:396.135
[980]	train-rmse:97.8278	test-rmse:396.084
[990]	train-rmse:96.733	test-rmse:395.979
[1000]	train-rmse:95.5546	test-rmse:395.932
[1010]	train-rmse:94.5416	test-rmse:395.859
[1020]	train-rmse:93.5322	test-rmse:395.84
[1030]	train-rmse:92.5627	test-rmse:395.777
[1040]	train-rmse:91.632	test-rmse:395.73
[1050]	train-rmse:90.7065	test-rmse:395.688
[1060]	train-rmse:89.6513	test-rmse:395.683
[1070]	train-rmse:88.7576	test-rmse:395.64
[1080]	train-rmse:87.7975	test-rmse:395.608
[1090]	train-rmse:86.7778	test-rmse:395.624
[1100]	train-rmse:85.791	test-rmse:395.558
[1110]	train-rmse:84.8209	test-rmse:395.491
[1120]	train-rmse:84.072	test-rmse:395.477
[1130]	train-rmse:83.1907	test-rmse:395.406
[1140]	train-rmse:82.2927	test-rmse:395.401
[1150]	train-rmse:81.3624	test-rmse:395.328
[1160]	train-rmse:80.5873	test-rmse:395.326
[1170]	train-rmse:79.6714	test-rmse:395.243
[1180]	train-rmse:78.7796	test-rmse:395.265
[1190]	train-rmse:77.9533	test-rmse:395.27
[1200]	train-rmse:77.0779	test-rmse:395.223
[1210]	train-rmse:76.3216	test-rmse:395.247
[1220]	train-rmse:75.4878	test-rmse:395.217
[1230]	train-rmse:74.7564	test-rmse:395.206
[1240]	train-rmse:73.9597	test-rmse:395.186
[1250]	train-rmse:73.186	test-rmse:395.174
[1260]	train-rmse:72.3642	test-rmse:395.192
[1270]	train-rmse:71.5474	test-rmse:395.195
[1280]	train-rmse:70.7586	test-rmse:395.141
[1290]	train-rmse:69.9707	test-rmse:395.129
[1300]	train-rmse:69.2691	test-rmse:395.085
[1310]	train-rmse:68.4216	test-rmse:395.038
[1320]	train-rmse:67.6029	test-rmse:395.041
[1330]	train-rmse:66.8725	test-rmse:394.992
[1340]	train-rmse:66.1446	test-rmse:394.947
[1350]	train-rmse:65.3928	test-rmse:394.893
[1360]	train-rmse:64.6351	test-rmse:394.841
[1370]	train-rmse:63.8082	test-rmse:394.758
[1380]	train-rmse:63.1021	test-rmse:394.704
[1390]	train-rmse:62.482	test-rmse:394.678
[1400]	train-rmse:61.7493	test-rmse:394.66
[1410]	train-rmse:61.0604	test-rmse:394.654
[1420]	train-rmse:60.4354	test-rmse:394.638
[1430]	train-rmse:59.8653	test-rmse:394.539
[1440]	train-rmse:59.252	test-rmse:394.515
[1450]	train-rmse:58.5836	test-rmse:394.525
[1460]	train-rmse:57.891	test-rmse:394.49
[1470]	train-rmse:57.2005	test-rmse:394.477
[1480]	train-rmse:56.6568	test-rmse:394.452
[1490]	train-rmse:56.0932	test-rmse:394.418
[1500]	train-rmse:55.4252	test-rmse:394.39
[1510]	train-rmse:54.7605	test-rmse:394.389
[1520]	train-rmse:54.2345	test-rmse:394.387
[1530]	train-rmse:53.6443	test-rmse:394.374
[1540]	train-rmse:53.1337	test-rmse:394.362
[1550]	train-rmse:52.6577	test-rmse:394.336
[1560]	train-rmse:52.2306	test-rmse:394.384
[1570]	train-rmse:51.7505	test-rmse:394.363
[1580]	train-rmse:51.2197	test-rmse:394.367
[1590]	train-rmse:50.7761	test-rmse:394.372
Stopping. Best iteration:
[1548]	train-rmse:52.7044	test-rmse:394.336

[474.776733, 394.336029]
[0]	train-rmse:1212.14	test-rmse:1290.42
Multiple eval metrics have been passed: 'test-rmse' will be used for early stopping.

Will train until test-rmse hasn't improved in 50 rounds.
[10]	train-rmse:1052.14	test-rmse:1140.87
[20]	train-rmse:911.018	test-rmse:1006.11
[30]	train-rmse:793.425	test-rmse:898.308
[40]	train-rmse:705.072	test-rmse:819.624
[50]	train-rmse:631.223	test-rmse:753.54
[60]	train-rmse:568.202	test-rmse:698.96
[70]	train-rmse:515.526	test-rmse:654.488
[80]	train-rmse:473.386	test-rmse:619.649
[90]	train-rmse:440.581	test-rmse:595.791
[100]	train-rmse:412.557	test-rmse:576.916
[110]	train-rmse:389.959	test-rmse:562.277
[120]	train-rmse:368.974	test-rmse:547.589
[130]	train-rmse:349.105	test-rmse:534.928
[140]	train-rmse:334.516	test-rmse:526.299
[150]	train-rmse:320.33	test-rmse:518.513
[160]	train-rmse:308.393	test-rmse:512.656
[170]	train-rmse:298.091	test-rmse:508.041
[180]	train-rmse:288.686	test-rmse:503.881
[190]	train-rmse:279.906	test-rmse:500.095
[200]	train-rmse:272.082	test-rmse:496.506
[210]	train-rmse:265.203	test-rmse:493.684
[220]	train-rmse:259.091	test-rmse:491.314
[230]	train-rmse:252.686	test-rmse:489.107
[240]	train-rmse:247.638	test-rmse:487.621
[250]	train-rmse:242.859	test-rmse:486.295
[260]	train-rmse:238.503	test-rmse:484.973
[270]	train-rmse:234.679	test-rmse:483.584
[280]	train-rmse:230.571	test-rmse:482.232
[290]	train-rmse:226.527	test-rmse:481.265
[300]	train-rmse:223.273	test-rmse:480.641
[310]	train-rmse:219.186	test-rmse:479.567
[320]	train-rmse:215.982	test-rmse:478.959
[330]	train-rmse:212.759	test-rmse:478.168
[340]	train-rmse:209.641	test-rmse:477.63
[350]	train-rmse:206.906	test-rmse:477.019
[360]	train-rmse:204.303	test-rmse:476.489
[370]	train-rmse:200.656	test-rmse:475.897
[380]	train-rmse:197.47	test-rmse:475.304
[390]	train-rmse:194.731	test-rmse:474.789
[400]	train-rmse:192.306	test-rmse:474.405
[410]	train-rmse:189.749	test-rmse:473.999
[420]	train-rmse:186.988	test-rmse:473.641
[430]	train-rmse:184.316	test-rmse:473.069
[440]	train-rmse:182.471	test-rmse:472.832
[450]	train-rmse:180.396	test-rmse:472.569
[460]	train-rmse:178.137	test-rmse:472.285
[470]	train-rmse:175.814	test-rmse:472.02
[480]	train-rmse:173.829	test-rmse:471.814
[490]	train-rmse:171.578	test-rmse:471.57
[500]	train-rmse:168.993	test-rmse:471.279
[510]	train-rmse:166.919	test-rmse:471.034
[520]	train-rmse:164.639	test-rmse:470.859
[530]	train-rmse:162.364	test-rmse:470.663
[540]	train-rmse:160.219	test-rmse:470.457
[550]	train-rmse:157.912	test-rmse:470.201
[560]	train-rmse:156.081	test-rmse:470.059
[570]	train-rmse:154.365	test-rmse:469.919
[580]	train-rmse:152.799	test-rmse:469.907
[590]	train-rmse:151.326	test-rmse:469.795
[600]	train-rmse:149.525	test-rmse:469.569
[610]	train-rmse:148.112	test-rmse:469.46
[620]	train-rmse:146.18	test-rmse:469.441
[630]	train-rmse:144.21	test-rmse:469.172
[640]	train-rmse:142.368	test-rmse:469.03
[650]	train-rmse:140.531	test-rmse:468.765
[660]	train-rmse:138.827	test-rmse:468.734
[670]	train-rmse:137.172	test-rmse:468.697
[680]	train-rmse:135.237	test-rmse:468.603
[690]	train-rmse:133.561	test-rmse:468.472
[700]	train-rmse:132.082	test-rmse:468.43
[710]	train-rmse:130.38	test-rmse:468.276
[720]	train-rmse:129.018	test-rmse:468.222
[730]	train-rmse:127.518	test-rmse:468.141
[740]	train-rmse:126.18	test-rmse:467.99
[750]	train-rmse:124.81	test-rmse:467.808
[760]	train-rmse:123.471	test-rmse:467.657
[770]	train-rmse:122.113	test-rmse:467.608
[780]	train-rmse:120.665	test-rmse:467.49
[790]	train-rmse:119.549	test-rmse:467.376
[800]	train-rmse:118.191	test-rmse:467.225
[810]	train-rmse:116.563	test-rmse:467.303
[820]	train-rmse:115.33	test-rmse:467.286
[830]	train-rmse:113.807	test-rmse:467.161
[840]	train-rmse:112.602	test-rmse:467.132
[850]	train-rmse:111.211	test-rmse:467.034
[860]	train-rmse:109.954	test-rmse:466.972
[870]	train-rmse:108.927	test-rmse:466.967
[880]	train-rmse:107.553	test-rmse:466.85
[890]	train-rmse:106.406	test-rmse:466.745
[900]	train-rmse:105.153	test-rmse:466.734
[910]	train-rmse:104.125	test-rmse:466.632
[920]	train-rmse:103.16	test-rmse:466.641
[930]	train-rmse:102.026	test-rmse:466.694
[940]	train-rmse:100.601	test-rmse:466.57
[950]	train-rmse:99.4201	test-rmse:466.533
[960]	train-rmse:98.1316	test-rmse:466.425
[970]	train-rmse:96.7797	test-rmse:466.282
[980]	train-rmse:95.5996	test-rmse:466.257
[990]	train-rmse:94.5604	test-rmse:466.196
[1000]	train-rmse:93.6059	test-rmse:466.2
[1010]	train-rmse:92.4144	test-rmse:466.18
[1020]	train-rmse:91.2113	test-rmse:466.048
[1030]	train-rmse:90.3653	test-rmse:466.001
[1040]	train-rmse:89.0559	test-rmse:465.958
[1050]	train-rmse:87.9766	test-rmse:465.873
[1060]	train-rmse:86.9341	test-rmse:465.867
[1070]	train-rmse:85.8421	test-rmse:465.826
[1080]	train-rmse:84.7859	test-rmse:465.829
[1090]	train-rmse:83.8317	test-rmse:465.754
[1100]	train-rmse:82.7952	test-rmse:465.697
[1110]	train-rmse:81.9175	test-rmse:465.673
[1120]	train-rmse:81.0885	test-rmse:465.627
[1130]	train-rmse:80.1341	test-rmse:465.653
[1140]	train-rmse:79.1043	test-rmse:465.611
[1150]	train-rmse:78.3826	test-rmse:465.61
[1160]	train-rmse:77.4457	test-rmse:465.544
[1170]	train-rmse:76.5519	test-rmse:465.542
[1180]	train-rmse:75.7287	test-rmse:465.562
[1190]	train-rmse:74.9452	test-rmse:465.575
[1200]	train-rmse:74.0958	test-rmse:465.567
[1210]	train-rmse:73.2508	test-rmse:465.505
[1220]	train-rmse:72.4175	test-rmse:465.453
[1230]	train-rmse:71.587	test-rmse:465.445
[1240]	train-rmse:70.6188	test-rmse:465.464
[1250]	train-rmse:69.7721	test-rmse:465.427
[1260]	train-rmse:69.0343	test-rmse:465.353
[1270]	train-rmse:68.3022	test-rmse:465.331
[1280]	train-rmse:67.4857	test-rmse:465.285
[1290]	train-rmse:66.7692	test-rmse:465.236
[1300]	train-rmse:65.8989	test-rmse:465.184
[1310]	train-rmse:65.09	test-rmse:465.147
[1320]	train-rmse:64.3398	test-rmse:465.178
[1330]	train-rmse:63.6281	test-rmse:465.175
[1340]	train-rmse:63.1028	test-rmse:465.157
[1350]	train-rmse:62.4543	test-rmse:465.145
[1360]	train-rmse:61.7849	test-rmse:465.115
[1370]	train-rmse:61.0985	test-rmse:465.116
[1380]	train-rmse:60.4981	test-rmse:465.123
[1390]	train-rmse:59.984	test-rmse:465.052
[1400]	train-rmse:59.3483	test-rmse:465.039
[1410]	train-rmse:58.7629	test-rmse:465.034
[1420]	train-rmse:58.0946	test-rmse:465.002
[1430]	train-rmse:57.4618	test-rmse:464.945
[1440]	train-rmse:56.6893	test-rmse:464.966
[1450]	train-rmse:55.9963	test-rmse:464.927
[1460]	train-rmse:55.4932	test-rmse:464.884
[1470]	train-rmse:54.8629	test-rmse:464.873
[1480]	train-rmse:54.1698	test-rmse:464.874
[1490]	train-rmse:53.6338	test-rmse:464.902
[1500]	train-rmse:53.0382	test-rmse:464.899
[1510]	train-rmse:52.5427	test-rmse:464.924
[1520]	train-rmse:51.9787	test-rmse:464.947
Stopping. Best iteration:
[1479]	train-rmse:54.2713	test-rmse:464.85

[474.776733, 394.336029, 464.849518]
[0]	train-rmse:1239.03	test-rmse:1180.17
Multiple eval metrics have been passed: 'test-rmse' will be used for early stopping.

Will train until test-rmse hasn't improved in 50 rounds.
[10]	train-rmse:1065.99	test-rmse:1017.24
[20]	train-rmse:922.866	test-rmse:883.003
[30]	train-rmse:810.292	test-rmse:780.283
[40]	train-rmse:721.357	test-rmse:702.036
[50]	train-rmse:647.448	test-rmse:640.501
[60]	train-rmse:583.016	test-rmse:587.096
[70]	train-rmse:529.547	test-rmse:544.747
[80]	train-rmse:487.491	test-rmse:514.883
[90]	train-rmse:452.393	test-rmse:492.143
[100]	train-rmse:423.656	test-rmse:475.033
[110]	train-rmse:398.832	test-rmse:461.883
[120]	train-rmse:376.716	test-rmse:451.327
[130]	train-rmse:358.893	test-rmse:443.544
[140]	train-rmse:342.822	test-rmse:437.075
[150]	train-rmse:329.391	test-rmse:432.446
[160]	train-rmse:317.356	test-rmse:428.939
[170]	train-rmse:306.458	test-rmse:425.844
[180]	train-rmse:297.067	test-rmse:422.62
[190]	train-rmse:288.917	test-rmse:420.518
[200]	train-rmse:281.212	test-rmse:418.877
[210]	train-rmse:274.136	test-rmse:417.525
[220]	train-rmse:267.699	test-rmse:416.312
[230]	train-rmse:261.418	test-rmse:415.483
[240]	train-rmse:255.903	test-rmse:414.922
[250]	train-rmse:250.623	test-rmse:413.934
[260]	train-rmse:246.211	test-rmse:413.265
[270]	train-rmse:241.872	test-rmse:412.797
[280]	train-rmse:237.886	test-rmse:411.768
[290]	train-rmse:233.559	test-rmse:410.915
[300]	train-rmse:230.073	test-rmse:410.263
[310]	train-rmse:226.351	test-rmse:409.764
[320]	train-rmse:223.278	test-rmse:409.332
[330]	train-rmse:219.541	test-rmse:408.626
[340]	train-rmse:216.099	test-rmse:408.128
[350]	train-rmse:213.14	test-rmse:407.806
[360]	train-rmse:209.75	test-rmse:407.41
[370]	train-rmse:206.992	test-rmse:407.378
[380]	train-rmse:204.557	test-rmse:406.961
[390]	train-rmse:202.023	test-rmse:406.584
[400]	train-rmse:199.08	test-rmse:406.168
[410]	train-rmse:196.095	test-rmse:405.826
[420]	train-rmse:193.812	test-rmse:405.541
[430]	train-rmse:191.092	test-rmse:405.097
[440]	train-rmse:188.813	test-rmse:404.873
[450]	train-rmse:186.649	test-rmse:404.606
[460]	train-rmse:184.538	test-rmse:404.395
[470]	train-rmse:181.958	test-rmse:404.269
[480]	train-rmse:179.629	test-rmse:403.973
[490]	train-rmse:177.591	test-rmse:403.822
[500]	train-rmse:175.109	test-rmse:403.695
[510]	train-rmse:172.626	test-rmse:403.474
[520]	train-rmse:170.735	test-rmse:403.252
[530]	train-rmse:168.922	test-rmse:403.122
[540]	train-rmse:167.191	test-rmse:403.104
[550]	train-rmse:165.517	test-rmse:403.002
[560]	train-rmse:163.485	test-rmse:402.537
[570]	train-rmse:161.598	test-rmse:402.322
[580]	train-rmse:159.946	test-rmse:402.137
[590]	train-rmse:157.902	test-rmse:401.912
[600]	train-rmse:156.398	test-rmse:401.734
[610]	train-rmse:154.27	test-rmse:401.628
[620]	train-rmse:152.459	test-rmse:401.401
[630]	train-rmse:151.338	test-rmse:401.324
[640]	train-rmse:149.645	test-rmse:401.126
[650]	train-rmse:148.206	test-rmse:401.128
[660]	train-rmse:146.408	test-rmse:401.095
[670]	train-rmse:144.795	test-rmse:401.032
[680]	train-rmse:142.98	test-rmse:400.884
[690]	train-rmse:141.389	test-rmse:400.726
[700]	train-rmse:139.793	test-rmse:400.538
[710]	train-rmse:138.278	test-rmse:400.393
[720]	train-rmse:136.601	test-rmse:400.238
[730]	train-rmse:135.118	test-rmse:400.1
[740]	train-rmse:133.295	test-rmse:399.89
[750]	train-rmse:131.881	test-rmse:399.775
[760]	train-rmse:130.324	test-rmse:399.664
[770]	train-rmse:128.839	test-rmse:399.602
[780]	train-rmse:127.098	test-rmse:399.521
[790]	train-rmse:125.905	test-rmse:399.485
[800]	train-rmse:124.325	test-rmse:399.455
[810]	train-rmse:122.849	test-rmse:399.342
[820]	train-rmse:121.238	test-rmse:399.211
[830]	train-rmse:119.911	test-rmse:399.107
[840]	train-rmse:118.518	test-rmse:398.908
[850]	train-rmse:117.154	test-rmse:398.767
[860]	train-rmse:115.856	test-rmse:398.71
[870]	train-rmse:114.504	test-rmse:398.71
[880]	train-rmse:113.282	test-rmse:398.542
[890]	train-rmse:111.99	test-rmse:398.485
[900]	train-rmse:110.691	test-rmse:398.459
[910]	train-rmse:109.405	test-rmse:398.302
[920]	train-rmse:107.885	test-rmse:398.194
[930]	train-rmse:106.536	test-rmse:398.033
[940]	train-rmse:105.476	test-rmse:397.941
[950]	train-rmse:104.277	test-rmse:397.852
[960]	train-rmse:103.311	test-rmse:397.746
[970]	train-rmse:102.378	test-rmse:397.779
[980]	train-rmse:101.218	test-rmse:397.671
[990]	train-rmse:100.035	test-rmse:397.608
[1000]	train-rmse:98.6698	test-rmse:397.597
[1010]	train-rmse:97.5166	test-rmse:397.504
[1020]	train-rmse:96.462	test-rmse:397.452
[1030]	train-rmse:95.6421	test-rmse:397.436
[1040]	train-rmse:94.4729	test-rmse:397.392
[1050]	train-rmse:93.5609	test-rmse:397.369
[1060]	train-rmse:92.4587	test-rmse:397.393
[1070]	train-rmse:91.6399	test-rmse:397.349
[1080]	train-rmse:90.7704	test-rmse:397.339
[1090]	train-rmse:89.7145	test-rmse:397.327
[1100]	train-rmse:88.6791	test-rmse:397.355
[1110]	train-rmse:87.7205	test-rmse:397.33
[1120]	train-rmse:86.9383	test-rmse:397.272
[1130]	train-rmse:85.9308	test-rmse:397.221
[1140]	train-rmse:84.9875	test-rmse:397.184
[1150]	train-rmse:83.9965	test-rmse:397.148
[1160]	train-rmse:83.1341	test-rmse:397.108
[1170]	train-rmse:82.335	test-rmse:397.077
[1180]	train-rmse:81.397	test-rmse:397.015
[1190]	train-rmse:80.4147	test-rmse:396.929
[1200]	train-rmse:79.4987	test-rmse:396.891
[1210]	train-rmse:78.5466	test-rmse:396.908
[1220]	train-rmse:77.6992	test-rmse:396.899
[1230]	train-rmse:76.9107	test-rmse:396.923
[1240]	train-rmse:76.0797	test-rmse:396.892
[1250]	train-rmse:75.3036	test-rmse:396.87
[1260]	train-rmse:74.4293	test-rmse:396.852
[1270]	train-rmse:73.5615	test-rmse:396.792
[1280]	train-rmse:72.6775	test-rmse:396.786
[1290]	train-rmse:71.9069	test-rmse:396.768
[1300]	train-rmse:71.0567	test-rmse:396.707
[1310]	train-rmse:70.134	test-rmse:396.643
[1320]	train-rmse:69.2857	test-rmse:396.626
[1330]	train-rmse:68.4195	test-rmse:396.663
[1340]	train-rmse:67.6335	test-rmse:396.619
[1350]	train-rmse:66.8859	test-rmse:396.605
[1360]	train-rmse:66.2552	test-rmse:396.591
[1370]	train-rmse:65.5851	test-rmse:396.551
[1380]	train-rmse:64.939	test-rmse:396.511
[1390]	train-rmse:64.2486	test-rmse:396.508
[1400]	train-rmse:63.5412	test-rmse:396.497
[1410]	train-rmse:62.8089	test-rmse:396.466
[1420]	train-rmse:62.1727	test-rmse:396.428
[1430]	train-rmse:61.466	test-rmse:396.368
[1440]	train-rmse:60.8759	test-rmse:396.324
[1450]	train-rmse:60.1785	test-rmse:396.296
[1460]	train-rmse:59.4991	test-rmse:396.29
[1470]	train-rmse:59.0082	test-rmse:396.273
[1480]	train-rmse:58.2652	test-rmse:396.279
[1490]	train-rmse:57.7558	test-rmse:396.278
[1500]	train-rmse:57.1933	test-rmse:396.231
[1510]	train-rmse:56.5733	test-rmse:396.253
[1520]	train-rmse:55.9551	test-rmse:396.233
[1530]	train-rmse:55.2779	test-rmse:396.208
[1540]	train-rmse:54.6735	test-rmse:396.168
[1550]	train-rmse:54.0565	test-rmse:396.17
[1560]	train-rmse:53.4444	test-rmse:396.152
[1570]	train-rmse:52.7738	test-rmse:396.148
[1580]	train-rmse:52.2407	test-rmse:396.142
[1590]	train-rmse:51.7227	test-rmse:396.163
[1600]	train-rmse:51.2015	test-rmse:396.145
[1610]	train-rmse:50.7084	test-rmse:396.144
[1620]	train-rmse:50.1631	test-rmse:396.148
[1630]	train-rmse:49.6608	test-rmse:396.123
[1640]	train-rmse:49.0557	test-rmse:396.146
[1650]	train-rmse:48.5729	test-rmse:396.115
[1660]	train-rmse:48.0567	test-rmse:396.102
[1670]	train-rmse:47.5104	test-rmse:396.107
[1680]	train-rmse:46.9425	test-rmse:396.096
[1690]	train-rmse:46.362	test-rmse:396.069
[1700]	train-rmse:45.8708	test-rmse:396.041
[1710]	train-rmse:45.324	test-rmse:396.011
[1720]	train-rmse:44.84	test-rmse:395.958
[1730]	train-rmse:44.5575	test-rmse:395.965
[1740]	train-rmse:44.059	test-rmse:395.97
[1750]	train-rmse:43.6527	test-rmse:395.977
[1760]	train-rmse:43.2438	test-rmse:395.981
[1770]	train-rmse:42.7898	test-rmse:396
Stopping. Best iteration:
[1720]	train-rmse:44.84	test-rmse:395.958

[474.776733, 394.336029, 464.849518, 395.958038]
[0]	train-rmse:1221.41	test-rmse:1225.58
Multiple eval metrics have been passed: 'test-rmse' will be used for early stopping.

Will train until test-rmse hasn't improved in 50 rounds.
[10]	train-rmse:1067.32	test-rmse:1084.57
[20]	train-rmse:933.097	test-rmse:959.302
[30]	train-rmse:816.688	test-rmse:852.929
[40]	train-rmse:726.098	test-rmse:773.267
[50]	train-rmse:648.159	test-rmse:705.596
[60]	train-rmse:584.089	test-rmse:653.257
[70]	train-rmse:534.254	test-rmse:614.446
[80]	train-rmse:493.098	test-rmse:583.169
[90]	train-rmse:453.707	test-rmse:554.307
[100]	train-rmse:422.789	test-rmse:532.762
[110]	train-rmse:398.714	test-rmse:518.45
[120]	train-rmse:376.67	test-rmse:504.198
[130]	train-rmse:357.83	test-rmse:493.271
[140]	train-rmse:342.748	test-rmse:484.449
[150]	train-rmse:328.163	test-rmse:476.441
[160]	train-rmse:315.103	test-rmse:470.043
[170]	train-rmse:304.507	test-rmse:465.14
[180]	train-rmse:294.669	test-rmse:460.621
[190]	train-rmse:286.234	test-rmse:456.894
[200]	train-rmse:278.956	test-rmse:453.95
[210]	train-rmse:271.633	test-rmse:451.04
[220]	train-rmse:264.65	test-rmse:448.223
[230]	train-rmse:258.414	test-rmse:446.066
[240]	train-rmse:253.069	test-rmse:444.317
[250]	train-rmse:247.861	test-rmse:442.523
[260]	train-rmse:243.254	test-rmse:440.796
[270]	train-rmse:238.833	test-rmse:439.396
[280]	train-rmse:234.395	test-rmse:437.895
[290]	train-rmse:231.033	test-rmse:436.705
[300]	train-rmse:227.605	test-rmse:435.886
[310]	train-rmse:224.314	test-rmse:435.206
[320]	train-rmse:221.172	test-rmse:434.417
[330]	train-rmse:217.604	test-rmse:433.234
[340]	train-rmse:214.822	test-rmse:432.777
[350]	train-rmse:211.496	test-rmse:432.12
[360]	train-rmse:208.608	test-rmse:431.574
[370]	train-rmse:205.399	test-rmse:430.983
[380]	train-rmse:202.393	test-rmse:430.406
[390]	train-rmse:199.256	test-rmse:429.766
[400]	train-rmse:196.525	test-rmse:429.15
[410]	train-rmse:193.378	test-rmse:428.508
[420]	train-rmse:191.07	test-rmse:428.062
[430]	train-rmse:188.277	test-rmse:427.718
[440]	train-rmse:186.062	test-rmse:427.413
[450]	train-rmse:183.723	test-rmse:426.955
[460]	train-rmse:181.966	test-rmse:426.691
[470]	train-rmse:179.19	test-rmse:426.12
[480]	train-rmse:177.061	test-rmse:425.763
[490]	train-rmse:174.671	test-rmse:425.49
[500]	train-rmse:172.542	test-rmse:424.933
[510]	train-rmse:170.193	test-rmse:424.588
[520]	train-rmse:168.272	test-rmse:424.472
[530]	train-rmse:166.384	test-rmse:424.251
[540]	train-rmse:164.679	test-rmse:424.033
[550]	train-rmse:162.649	test-rmse:423.858
[560]	train-rmse:160.192	test-rmse:423.685
[570]	train-rmse:158.112	test-rmse:423.528
[580]	train-rmse:156.105	test-rmse:423.355
[590]	train-rmse:154.01	test-rmse:422.996
[600]	train-rmse:152.208	test-rmse:422.668
[610]	train-rmse:150.154	test-rmse:422.546
[620]	train-rmse:148.24	test-rmse:422.42
[630]	train-rmse:146.342	test-rmse:422.268
[640]	train-rmse:144.563	test-rmse:422.086
[650]	train-rmse:143.195	test-rmse:421.979
[660]	train-rmse:141.373	test-rmse:421.847
[670]	train-rmse:139.893	test-rmse:421.805
[680]	train-rmse:138.382	test-rmse:421.661
[690]	train-rmse:136.682	test-rmse:421.573
[700]	train-rmse:134.939	test-rmse:421.381
[710]	train-rmse:133.159	test-rmse:421.366
[720]	train-rmse:131.66	test-rmse:421.273
[730]	train-rmse:129.96	test-rmse:421.17
[740]	train-rmse:128.596	test-rmse:420.985
[750]	train-rmse:126.901	test-rmse:420.873
[760]	train-rmse:125.574	test-rmse:420.765
[770]	train-rmse:123.988	test-rmse:420.5
[780]	train-rmse:122.498	test-rmse:420.418
[790]	train-rmse:120.951	test-rmse:420.297
[800]	train-rmse:119.473	test-rmse:420.095
[810]	train-rmse:117.877	test-rmse:419.984
[820]	train-rmse:116.54	test-rmse:419.841
[830]	train-rmse:115.014	test-rmse:419.775
[840]	train-rmse:113.694	test-rmse:419.753
[850]	train-rmse:112.657	test-rmse:419.661
[860]	train-rmse:111.285	test-rmse:419.478
[870]	train-rmse:110.128	test-rmse:419.402
[880]	train-rmse:108.823	test-rmse:419.425
[890]	train-rmse:107.576	test-rmse:419.329
[900]	train-rmse:106.302	test-rmse:419.368
[910]	train-rmse:105.254	test-rmse:419.32
[920]	train-rmse:104.192	test-rmse:419.243
[930]	train-rmse:102.913	test-rmse:419.116
[940]	train-rmse:101.898	test-rmse:418.987
[950]	train-rmse:100.653	test-rmse:418.92
[960]	train-rmse:99.4553	test-rmse:418.931
[970]	train-rmse:98.3237	test-rmse:418.845
[980]	train-rmse:97.3031	test-rmse:418.815
[990]	train-rmse:96.2106	test-rmse:418.735
[1000]	train-rmse:94.9755	test-rmse:418.66
[1010]	train-rmse:93.8557	test-rmse:418.619
[1020]	train-rmse:92.8281	test-rmse:418.572
[1030]	train-rmse:91.84	test-rmse:418.569
[1040]	train-rmse:90.7744	test-rmse:418.523
[1050]	train-rmse:89.9151	test-rmse:418.481
[1060]	train-rmse:88.8149	test-rmse:418.503
[1070]	train-rmse:88.0107	test-rmse:418.419
[1080]	train-rmse:86.939	test-rmse:418.38
[1090]	train-rmse:85.9556	test-rmse:418.334
[1100]	train-rmse:84.9139	test-rmse:418.232
[1110]	train-rmse:83.9822	test-rmse:418.194
[1120]	train-rmse:83.108	test-rmse:418.153
[1130]	train-rmse:82.073	test-rmse:418.053
[1140]	train-rmse:81.2074	test-rmse:418.055
[1150]	train-rmse:80.2316	test-rmse:418.039
[1160]	train-rmse:79.4253	test-rmse:418.04
[1170]	train-rmse:78.5818	test-rmse:418.057
[1180]	train-rmse:77.7197	test-rmse:418.007
[1190]	train-rmse:76.9089	test-rmse:417.95
[1200]	train-rmse:76.0556	test-rmse:417.882
[1210]	train-rmse:75.1701	test-rmse:417.837
[1220]	train-rmse:74.4298	test-rmse:417.812
[1230]	train-rmse:73.5618	test-rmse:417.843
[1240]	train-rmse:72.627	test-rmse:417.741
[1250]	train-rmse:71.8712	test-rmse:417.701
[1260]	train-rmse:71.0659	test-rmse:417.648
[1270]	train-rmse:70.1714	test-rmse:417.596
[1280]	train-rmse:69.3242	test-rmse:417.556
[1290]	train-rmse:68.5294	test-rmse:417.482
[1300]	train-rmse:67.7183	test-rmse:417.437
[1310]	train-rmse:67.0397	test-rmse:417.406
[1320]	train-rmse:66.3183	test-rmse:417.352
[1330]	train-rmse:65.5909	test-rmse:417.298
[1340]	train-rmse:64.8486	test-rmse:417.273
[1350]	train-rmse:64.147	test-rmse:417.252
[1360]	train-rmse:63.4407	test-rmse:417.216
[1370]	train-rmse:62.8436	test-rmse:417.191
[1380]	train-rmse:62.1757	test-rmse:417.215
[1390]	train-rmse:61.5254	test-rmse:417.193
[1400]	train-rmse:60.8667	test-rmse:417.206
[1410]	train-rmse:60.1375	test-rmse:417.124
[1420]	train-rmse:59.532	test-rmse:417.133
[1430]	train-rmse:58.9775	test-rmse:417.122
[1440]	train-rmse:58.336	test-rmse:417.093
[1450]	train-rmse:57.7607	test-rmse:417.09
[1460]	train-rmse:57.1279	test-rmse:417.052
[1470]	train-rmse:56.4878	test-rmse:417.041
[1480]	train-rmse:55.9151	test-rmse:417.071
[1490]	train-rmse:55.3563	test-rmse:417.068
[1500]	train-rmse:54.8452	test-rmse:417.052
[1510]	train-rmse:54.2538	test-rmse:417.036
[1520]	train-rmse:53.6426	test-rmse:416.984
[1530]	train-rmse:53.0427	test-rmse:416.969
[1540]	train-rmse:52.4482	test-rmse:416.966
[1550]	train-rmse:51.8418	test-rmse:416.933
[1560]	train-rmse:51.348	test-rmse:416.894
[1570]	train-rmse:50.7874	test-rmse:416.902
[1580]	train-rmse:50.2761	test-rmse:416.899
[1590]	train-rmse:49.7011	test-rmse:416.854
[1600]	train-rmse:49.1479	test-rmse:416.832
[1610]	train-rmse:48.6759	test-rmse:416.799
[1620]	train-rmse:48.1575	test-rmse:416.798
[1630]	train-rmse:47.7116	test-rmse:416.769
[1640]	train-rmse:47.1909	test-rmse:416.789
[1650]	train-rmse:46.7493	test-rmse:416.78
[1660]	train-rmse:46.3054	test-rmse:416.782
[1670]	train-rmse:45.8252	test-rmse:416.768
[1680]	train-rmse:45.4239	test-rmse:416.75
[1690]	train-rmse:44.9432	test-rmse:416.767
[1700]	train-rmse:44.4245	test-rmse:416.765
[1710]	train-rmse:43.9041	test-rmse:416.726
[1720]	train-rmse:43.4846	test-rmse:416.674
[1730]	train-rmse:42.9992	test-rmse:416.67
[1740]	train-rmse:42.5682	test-rmse:416.662
[1750]	train-rmse:42.1444	test-rmse:416.67
[1760]	train-rmse:41.6962	test-rmse:416.672
[1770]	train-rmse:41.2827	test-rmse:416.673
[1780]	train-rmse:40.8613	test-rmse:416.695
[1790]	train-rmse:40.4251	test-rmse:416.677
[1800]	train-rmse:40.0498	test-rmse:416.658
[1810]	train-rmse:39.6405	test-rmse:416.662
[1820]	train-rmse:39.1946	test-rmse:416.656
[1830]	train-rmse:38.7764	test-rmse:416.65
[1840]	train-rmse:38.3668	test-rmse:416.647
[1850]	train-rmse:37.9646	test-rmse:416.667
Stopping. Best iteration:
[1803]	train-rmse:39.9824	test-rmse:416.643

[474.776733, 394.336029, 464.849518, 395.958038, 416.643188]

In [14]:
rv1_subset = rv1[1][rv1[1].interest_cat == 1]
rmse_subset = np.sqrt(sklearn.metrics.mean_squared_error(rv1_subset.predicted_price, rv1_subset.price))
mae_subset = sklearn.metrics.mean_absolute_error(rv1_subset.predicted_price, rv1_subset.price)

mae_full = sklearn.metrics.mean_absolute_error(rv1[1].predicted_price, rv1[1].price)

rmse_full = np.sqrt(sklearn.metrics.mean_squared_error(rv1[1].predicted_price, rv1[1].price))

ldiff = np.log(rv1[1].predicted_price) - np.log(rv1[1].price)

print(ldiff.mean(), rv1[1].interest_tgt.corr(ldiff), mae_subset, mae_full, rmse_full)


-0.05722887081518133 0.336711583383 255.25826432 505.16595037 1010.71003197

In [15]:
#rv1[1].columns = ['predicted_price', 'listing_id', 'interest_tgt']

df_testpreds = pd.DataFrame(rv1[2].mean(axis=0))
df_testpreds.columns = ['predicted_price']
df_testpreds['listing_id'] = cv_test[0].listing_id
df_allpreds = pd.concat([rv1[1][['predicted_price', 'listing_id']], df_testpreds])

df_allpreds.sort_values('listing_id', inplace=True)
df_allpreds.set_index('listing_id', inplace=True)

df_allpreds.to_pickle('fin-medium-price.pkl')

In [25]:
df_allprices = pd.concat([train_df[['listing_id', 'price']], test_df[['listing_id', 'price']]]).copy()

df_allprices.set_index('listing_id', inplace=True)
df_allprices.sort_index(inplace=True)

df_allpreds_logdiff = df_allpreds.copy()
df_allpreds_logdiff['logdiff'] = np.log(df_allpreds_logdiff.predicted_price) - np.log(df_allprices.price)
df_allpreds_logdiff.drop('predicted_price', axis=1, inplace=True)

#df_allpreds_logdiff.to_pickle('bag-submodel-medium-logdiff-r1.pkl')

df_fold = []
for f in range(5):
    df_fold.append(pd.DataFrame(rv1[2][f]))
    df_fold[-1].columns = ['predicted_price']
    df_fold[-1]['logdiff'] = np.log(df_fold[-1].predicted_price) - np.log(cv_test[0].price)
    df_fold[-1].drop('predicted_price', axis=1, inplace=True)
    
    df_fold[-1]['listing_id'] = test_df.listing_id
    df_fold[-1].sort_values('listing_id', inplace=True)
    df_fold[-1].set_index('listing_id', inplace=True)

pickle.dump((df_allpreds_logdiff, df_fold), open('model-medium-logdiff.pkl', 'wb'))

In [16]:
df_tp_cmp = df_testpreds.copy()
df_tp_cmp['price'] = cv_test[0]['price']

In [23]:
df_tp_cmp_cut = df_tp_cmp.copy()
df_tp_cmp_cut.price = df_tp_cmp_cut.price.clip(0, 13000)

In [24]:
np.sqrt(sklearn.metrics.mean_squared_error(df_tp_cmp_cut.price, df_tp_cmp_cut.predicted_price))


Out[24]:
997.80340175096376

In [ ]:
# XXX update for final package

In [19]:
df_allprices = pd.concat([train_df[['listing_id', 'price']], test_df[['listing_id', 'price']]]).copy()

df_allprices.set_index('listing_id', inplace=True)
df_allprices.sort_index(inplace=True)

In [20]:
df_allpreds_logdiff = df_allpreds.copy()
df_allpreds_logdiff['logdiff'] = np.log(df_allpreds_logdiff.predicted_price) - np.log(df_allprices.price)
df_allpreds_logdiff.drop('predicted_price', axis=1, inplace=True)
df_allpreds_logdiff.to_pickle('fin-submodel-medium-logdiff.pkl')