In [2]:
import pandas as pd
from scipy import stats
import numpy as np # linear algebra
import seaborn as sns
import matplotlib.pyplot as plt
color = sns.color_palette()
%matplotlib inline

In [3]:
p=pd.read_csv('D:\\data\\properties_2016.csv', dtype={'hashottuborspa':'bool','propertycountylandusecode':'object','propertyzoningdesc':'object', 'fireplaceflag':'bool', 'taxdelinquencyflag':'object'})
p.head()


Out[3]:
parcelid airconditioningtypeid architecturalstyletypeid basementsqft bathroomcnt bedroomcnt buildingclasstypeid buildingqualitytypeid calculatedbathnbr decktypeid ... numberofstories fireplaceflag structuretaxvaluedollarcnt taxvaluedollarcnt assessmentyear landtaxvaluedollarcnt taxamount taxdelinquencyflag taxdelinquencyyear censustractandblock
0 10754147 NaN NaN NaN 0.0 0.0 NaN NaN NaN NaN ... NaN NaN NaN 9.0 2015.0 9.0 NaN NaN NaN NaN
1 10759547 NaN NaN NaN 0.0 0.0 NaN NaN NaN NaN ... NaN NaN NaN 27516.0 2015.0 27516.0 NaN NaN NaN NaN
2 10843547 NaN NaN NaN 0.0 0.0 NaN NaN NaN NaN ... NaN NaN 650756.0 1413387.0 2015.0 762631.0 20800.37 NaN NaN NaN
3 10859147 NaN NaN NaN 0.0 0.0 3.0 7.0 NaN NaN ... 1.0 NaN 571346.0 1156834.0 2015.0 585488.0 14557.57 NaN NaN NaN
4 10879947 NaN NaN NaN 0.0 0.0 4.0 NaN NaN NaN ... NaN NaN 193796.0 433491.0 2015.0 239695.0 5725.17 NaN NaN NaN

5 rows × 58 columns


In [4]:
p.describe()


D:\Program Files\Anaconda3\lib\site-packages\numpy\lib\function_base.py:3834: RuntimeWarning: Invalid value encountered in percentile
  RuntimeWarning)
Out[4]:
parcelid airconditioningtypeid architecturalstyletypeid basementsqft bathroomcnt bedroomcnt buildingclasstypeid buildingqualitytypeid calculatedbathnbr decktypeid ... yardbuildingsqft26 yearbuilt numberofstories structuretaxvaluedollarcnt taxvaluedollarcnt assessmentyear landtaxvaluedollarcnt taxamount taxdelinquencyyear censustractandblock
count 2.985217e+06 811519.000000 6061.000000 1628.000000 2.973755e+06 2.973767e+06 12629.000000 1.938488e+06 2.856305e+06 17096.0 ... 2647.000000 2.925289e+06 682069.000000 2.930235e+06 2.942667e+06 2.973778e+06 2.917484e+06 2.953967e+06 56464.000000 2.910091e+06
mean 1.332586e+07 1.931166 7.202607 646.883292 2.209143e+00 3.088949e+00 3.725948 5.784787e+00 2.299263e+00 66.0 ... 278.296562 1.964262e+03 1.401464 1.708836e+05 4.204790e+05 2.014999e+03 2.524780e+05 5.377607e+03 13.892409 6.048431e+13
std 7.909966e+06 3.148587 2.436290 538.793473 1.077754e+00 1.275859e+00 0.501700 1.805352e+00 1.000736e+00 0.0 ... 369.731508 2.344132e+01 0.539076 4.020683e+05 7.263467e+05 3.683161e-02 4.450132e+05 9.183107e+03 2.581006 3.249035e+11
min 1.071172e+07 1.000000 2.000000 20.000000 0.000000e+00 0.000000e+00 1.000000 1.000000e+00 1.000000e+00 66.0 ... 10.000000 1.801000e+03 1.000000 1.000000e+00 1.000000e+00 2.000000e+03 1.000000e+00 1.340000e+00 0.000000 -1.000000e+00
25% 1.164371e+07 NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
50% 1.254509e+07 NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
75% 1.409712e+07 NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
max 1.696019e+08 13.000000 27.000000 8516.000000 2.000000e+01 2.000000e+01 5.000000 1.200000e+01 2.000000e+01 66.0 ... 6141.000000 2.015000e+03 41.000000 2.514860e+08 2.827860e+08 2.016000e+03 9.024622e+07 3.458861e+06 99.000000 4.830301e+14

8 rows × 53 columns


In [5]:
p.shape


Out[5]:
(2985217, 58)

In [6]:
train=pd.read_csv('D:\\data\\train_2016.csv')
train.head()


Out[6]:
parcelid logerror transactiondate
0 11016594 0.0276 2016-01-01
1 14366692 -0.1684 2016-01-01
2 12098116 -0.0040 2016-01-01
3 12643413 0.0218 2016-01-02
4 14432541 -0.0050 2016-01-02

In [7]:
train.shape


Out[7]:
(90811, 3)

In [8]:
train['transactiondate']=pd.to_datetime(train['transactiondate'], errors='coerce')
train['transaction_month']=train['transactiondate'].dt.month

In [67]:
train_df=pd.merge(train, p, on='parcelid', how='left')
train_df.head()


Out[67]:
parcelid logerror transactiondate transaction_month airconditioningtypeid architecturalstyletypeid basementsqft bathroomcnt bedroomcnt buildingclasstypeid buildingqualitytypeid calculatedbathnbr decktypeid finishedfloor1squarefeet calculatedfinishedsquarefeet finishedsquarefeet12 finishedsquarefeet13 finishedsquarefeet15 finishedsquarefeet50 finishedsquarefeet6 fips fireplacecnt fullbathcnt garagecarcnt garagetotalsqft hashottuborspa heatingorsystemtypeid latitude longitude lotsizesquarefeet ... poolsizesum pooltypeid10 pooltypeid2 pooltypeid7 propertycountylandusecode propertylandusetypeid propertyzoningdesc rawcensustractandblock regionidcity regionidcounty regionidneighborhood regionidzip roomcnt storytypeid threequarterbathnbr typeconstructiontypeid unitcnt yardbuildingsqft17 yardbuildingsqft26 yearbuilt numberofstories fireplaceflag structuretaxvaluedollarcnt taxvaluedollarcnt assessmentyear landtaxvaluedollarcnt taxamount taxdelinquencyflag taxdelinquencyyear censustractandblock
0 11016594 0.0276 2016-01-01 1 1.0 NaN NaN 2.0 3.0 NaN 4.0 2.0 NaN NaN 1684.0 1684.0 NaN NaN NaN NaN 6037.0 NaN 2.0 NaN NaN NaN 2.0 34280990.0 -118488536.0 7528.0 ... NaN NaN NaN NaN 0100 261.0 LARS 6.037107e+07 12447.0 3101.0 31817.0 96370.0 0.0 NaN NaN NaN 1.0 NaN NaN 1959.0 NaN NaN 122754.0 360170.0 2015.0 237416.0 6735.88 NaN NaN 6.037107e+13
1 14366692 -0.1684 2016-01-01 1 NaN NaN NaN 3.5 4.0 NaN NaN 3.5 NaN NaN 2263.0 2263.0 NaN NaN NaN NaN 6059.0 NaN 3.0 2.0 468.0 NaN NaN 33668120.0 -117677556.0 3643.0 ... NaN NaN NaN NaN 1 261.0 NaN 6.059052e+07 32380.0 1286.0 NaN 96962.0 0.0 NaN 1.0 NaN NaN NaN NaN 2014.0 NaN NaN 346458.0 585529.0 2015.0 239071.0 10153.02 NaN NaN NaN
2 12098116 -0.0040 2016-01-01 1 1.0 NaN NaN 3.0 2.0 NaN 4.0 3.0 NaN NaN 2217.0 2217.0 NaN NaN NaN NaN 6037.0 NaN 3.0 NaN NaN NaN 2.0 34136312.0 -118175032.0 11423.0 ... NaN NaN NaN NaN 0100 261.0 PSR6 6.037464e+07 47019.0 3101.0 275411.0 96293.0 0.0 NaN NaN NaN 1.0 NaN NaN 1940.0 NaN NaN 61994.0 119906.0 2015.0 57912.0 11484.48 NaN NaN 6.037464e+13
3 12643413 0.0218 2016-01-02 1 1.0 NaN NaN 2.0 2.0 NaN 4.0 2.0 NaN NaN 839.0 839.0 NaN NaN NaN NaN 6037.0 NaN 2.0 NaN NaN NaN 2.0 33755800.0 -118309000.0 70859.0 ... NaN NaN NaN NaN 010C 266.0 LAR3 6.037296e+07 12447.0 3101.0 54300.0 96222.0 0.0 NaN NaN NaN 1.0 NaN NaN 1987.0 NaN NaN 171518.0 244880.0 2015.0 73362.0 3048.74 NaN NaN 6.037296e+13
4 14432541 -0.0050 2016-01-02 1 NaN NaN NaN 2.5 4.0 NaN NaN 2.5 NaN NaN 2283.0 2283.0 NaN NaN NaN NaN 6059.0 NaN 2.0 2.0 598.0 NaN NaN 33485643.0 -117700234.0 6000.0 ... NaN NaN NaN 1.0 122 261.0 NaN 6.059042e+07 17686.0 1286.0 NaN 96961.0 8.0 NaN 1.0 NaN NaN NaN NaN 1981.0 2.0 NaN 169574.0 434551.0 2015.0 264977.0 5488.96 NaN NaN 6.059042e+13

5 rows × 61 columns


In [53]:
size = int(train_df.shape[0]*1)
y_test_df = train_df[size:]['logerror'].values
y_train_df = train_df[0:size]['logerror'].values

In [54]:
train_df=train_df.drop(['parcelid', 'logerror', 'transactiondate', "propertycountylandusecode", "propertyzoningdesc"], axis=1)
cat_cols = ["hashottuborspa", "fireplaceflag", "taxdelinquencyflag"]
train_df.hashottuborspa.ix[train_df.hashottuborspa==True]=1
train_df.hashottuborspa.fillna(0)
train_df.fireplaceflag.ix[train_df.fireplaceflag==True]=1
train_df.fireplaceflag.fillna(0)
train_df.taxdelinquencyflag.ix[train_df.taxdelinquencyflag=='Y']=1
train_df.taxdelinquencyflag.fillna(0)
for col in cat_cols:
    train_df[col]=train_df[col].astype('category')

mean_values=train_df.mean(axis=0)
train_df_new = train_df.fillna(mean_values, inplace=True)

x_test_df = train_df[size:]
x_train_df = train_df[0:size]


D:\Program Files\Anaconda3\lib\site-packages\pandas\core\indexing.py:132: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)

In [55]:
cat_cols = ["hashottuborspa", "fireplaceflag", "taxdelinquencyflag"]   #drop object value
train_df = train_df.drop(cat_cols, axis=1)

In [56]:
x_test_df = train_df[size:]
x_train_df = train_df[0:size]

In [65]:
from sklearn import ensemble
model = ensemble.RandomForestRegressor(n_estimators=100,n_jobs=17)
model.fit(x_train_df, y_train_df)


Out[65]:
RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
           max_features='auto', max_leaf_nodes=None, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           n_estimators=100, n_jobs=17, oob_score=False, random_state=None,
           verbose=0, warm_start=False)

In [58]:
re = model.predict(x_test_df)

In [60]:
for i in range(10):
    print('target:', y_test_df[i], 'predict:',re[i])


target: -0.0161 predict: 0.009848
target: 0.0218 predict: 0.00431
target: 0.0218 predict: 0.0015
target: 0.0315 predict: 0.019917
target: 0.0257 predict: 0.008933
target: 0.1345 predict: 0.025632
target: 0.008 predict: -0.006804
target: -0.5447 predict: 0.019211
target: 0.0825 predict: 0.017207
target: 0.004 predict: 0.014104

In [61]:
def rmsle(y, y_, convertExp=True):
    if convertExp:
        y = np.exp(y),
        y_ = np.exp(y_)
    log1 = np.nan_to_num(np.array([np.log(v + 1) for v in y]))
    log2 = np.nan_to_num(np.array([np.log(v + 1) for v in y_]))
    calc = (log1 - log2) ** 2
    return np.sqrt(np.mean(calc))

In [62]:
rmsle(y_test_df, re)


Out[62]:
0.091112651498839056

In [70]:
p.ix[p.parcelid==10754147]


Out[70]:
parcelid airconditioningtypeid architecturalstyletypeid basementsqft bathroomcnt bedroomcnt buildingclasstypeid buildingqualitytypeid calculatedbathnbr decktypeid finishedfloor1squarefeet calculatedfinishedsquarefeet finishedsquarefeet12 finishedsquarefeet13 finishedsquarefeet15 finishedsquarefeet50 finishedsquarefeet6 fips fireplacecnt fullbathcnt garagecarcnt garagetotalsqft hashottuborspa heatingorsystemtypeid latitude longitude lotsizesquarefeet poolcnt poolsizesum pooltypeid10 pooltypeid2 pooltypeid7 propertycountylandusecode propertylandusetypeid propertyzoningdesc rawcensustractandblock regionidcity regionidcounty regionidneighborhood regionidzip roomcnt storytypeid threequarterbathnbr typeconstructiontypeid unitcnt yardbuildingsqft17 yardbuildingsqft26 yearbuilt numberofstories fireplaceflag structuretaxvaluedollarcnt taxvaluedollarcnt assessmentyear landtaxvaluedollarcnt taxamount taxdelinquencyflag taxdelinquencyyear censustractandblock
0 10754147 NaN NaN NaN 0.0 0.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 6037.0 NaN NaN NaN NaN NaN NaN 34144442.0 -118654084.0 85768.0 NaN NaN NaN NaN NaN 010D 269.0 NaN 6.037800e+07 37688.0 3101.0 NaN 96337.0 0.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 9.0 2015.0 9.0 NaN NaN NaN NaN

In [134]:
sub_df=pd.read_csv('D:\\data\\sample_submission.csv')
sub_df.head()


Out[134]:
ParcelId 201610 201611 201612 201710 201711 201712
0 10754147 0 0 0 0 0 0
1 10759547 0 0 0 0 0 0
2 10843547 0 0 0 0 0 0
3 10859147 0 0 0 0 0 0
4 10879947 0 0 0 0 0 0

In [137]:
sub_df['parcelid']=sub_df['ParcelId']
test=pd.merge(sub_df, p, on='parcelid', how='left')

test.head()


Out[137]:
ParcelId 201610 201611 201612 201710 201711 201712 parcelid airconditioningtypeid architecturalstyletypeid basementsqft bathroomcnt bedroomcnt buildingclasstypeid buildingqualitytypeid calculatedbathnbr decktypeid finishedfloor1squarefeet calculatedfinishedsquarefeet finishedsquarefeet12 finishedsquarefeet13 finishedsquarefeet15 finishedsquarefeet50 finishedsquarefeet6 fips fireplacecnt fullbathcnt garagecarcnt garagetotalsqft hashottuborspa ... poolsizesum pooltypeid10 pooltypeid2 pooltypeid7 propertycountylandusecode propertylandusetypeid propertyzoningdesc rawcensustractandblock regionidcity regionidcounty regionidneighborhood regionidzip roomcnt storytypeid threequarterbathnbr typeconstructiontypeid unitcnt yardbuildingsqft17 yardbuildingsqft26 yearbuilt numberofstories fireplaceflag structuretaxvaluedollarcnt taxvaluedollarcnt assessmentyear landtaxvaluedollarcnt taxamount taxdelinquencyflag taxdelinquencyyear censustractandblock
0 10754147 0 0 0 0 0 0 10754147 NaN NaN NaN 0.0 0.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 6037.0 NaN NaN NaN NaN NaN ... NaN NaN NaN NaN 010D 269.0 NaN 6.037800e+07 37688.0 3101.0 NaN 96337.0 0.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 9.0 2015.0 9.0 NaN NaN NaN NaN
1 10759547 0 0 0 0 0 0 10759547 NaN NaN NaN 0.0 0.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 6037.0 NaN NaN NaN NaN NaN ... NaN NaN NaN NaN 0109 261.0 LCA11* 6.037800e+07 37688.0 3101.0 NaN 96337.0 0.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 27516.0 2015.0 27516.0 NaN NaN NaN NaN
2 10843547 0 0 0 0 0 0 10843547 NaN NaN NaN 0.0 0.0 NaN NaN NaN NaN NaN 73026.0 NaN NaN 73026.0 NaN NaN 6037.0 NaN NaN NaN NaN NaN ... NaN NaN NaN NaN 1200 47.0 LAC2 6.037703e+07 51617.0 3101.0 NaN 96095.0 0.0 NaN NaN NaN 2.0 NaN NaN NaN NaN NaN 650756.0 1413387.0 2015.0 762631.0 20800.37 NaN NaN NaN
3 10859147 0 0 0 0 0 0 10859147 NaN NaN NaN 0.0 0.0 3.0 7.0 NaN NaN NaN 5068.0 NaN NaN 5068.0 NaN NaN 6037.0 NaN NaN NaN NaN NaN ... NaN NaN NaN NaN 1200 47.0 LAC2 6.037141e+07 12447.0 3101.0 27080.0 96424.0 0.0 NaN NaN NaN NaN NaN NaN 1948.0 1.0 NaN 571346.0 1156834.0 2015.0 585488.0 14557.57 NaN NaN NaN
4 10879947 0 0 0 0 0 0 10879947 NaN NaN NaN 0.0 0.0 4.0 NaN NaN NaN NaN 1776.0 NaN NaN 1776.0 NaN NaN 6037.0 NaN NaN NaN NaN NaN ... NaN NaN NaN NaN 1210 31.0 LAM1 6.037123e+07 12447.0 3101.0 46795.0 96450.0 0.0 NaN NaN NaN 1.0 NaN NaN 1947.0 NaN NaN 193796.0 433491.0 2015.0 239695.0 5725.17 NaN NaN NaN

5 rows × 65 columns


In [135]:
sub_df.shape


Out[135]:
(2985217, 7)

In [138]:
drop_month=['201610','201611','201612','201710','201711','201712']
test=test.drop(['parcelid', "propertycountylandusecode", "propertyzoningdesc"]+drop_month, axis=1)
cat_cols = ["hashottuborspa", "fireplaceflag", "taxdelinquencyflag"]
test.hashottuborspa.ix[test.hashottuborspa==True]=1
test.hashottuborspa.fillna(0)
test.fireplaceflag.ix[test.fireplaceflag==True]=1
test.fireplaceflag.fillna(0)
test.taxdelinquencyflag.ix[test.taxdelinquencyflag=='Y']=1
test.taxdelinquencyflag.fillna(0)
for col in cat_cols:
    test[col]=test[col].astype('category')


D:\Program Files\Anaconda3\lib\site-packages\pandas\core\indexing.py:132: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)

In [139]:
mean=test.mean(axis=0)
test.fillna(mean, inplace=True)
test.shape


Out[139]:
(2985217, 56)

In [140]:
test = test.drop(cat_cols, axis=1)

In [147]:
test=test.drop(['ParcelId'],axis=1)
test.head()


Out[147]:
airconditioningtypeid architecturalstyletypeid basementsqft bathroomcnt bedroomcnt buildingclasstypeid buildingqualitytypeid calculatedbathnbr decktypeid finishedfloor1squarefeet calculatedfinishedsquarefeet finishedsquarefeet12 finishedsquarefeet13 finishedsquarefeet15 finishedsquarefeet50 finishedsquarefeet6 fips fireplacecnt fullbathcnt garagecarcnt garagetotalsqft heatingorsystemtypeid latitude longitude lotsizesquarefeet poolcnt poolsizesum pooltypeid10 pooltypeid2 pooltypeid7 propertylandusetypeid rawcensustractandblock regionidcity regionidcounty regionidneighborhood regionidzip roomcnt storytypeid threequarterbathnbr typeconstructiontypeid unitcnt yardbuildingsqft17 yardbuildingsqft26 yearbuilt numberofstories structuretaxvaluedollarcnt taxvaluedollarcnt assessmentyear landtaxvaluedollarcnt taxamount taxdelinquencyyear censustractandblock transaction_month
0 1.931166 7.202607 646.883292 0.0 0.0 3.725948 5.784787 2.299263 66.0 1380.630396 1827.162124 1760.000608 1178.900678 2739.187235 1388.944578 2414.339439 6037.0 1.16871 2.244165 1.823517 383.769357 4.012053 34144442.0 -118654084.0 85768.0 1.0 519.71098 1.0 1.0 1.0 269.0 6.037800e+07 37688.0 3101.0 193476.407415 96337.0 0.0 7.0 1.010009 5.999555 1.181171 319.803397 278.296562 1964.261641 1.401464 170883.577166 9.0 2015.0 9.0 5377.607139 13.892409 6.048431e+13 10
1 1.931166 7.202607 646.883292 0.0 0.0 3.725948 5.784787 2.299263 66.0 1380.630396 1827.162124 1760.000608 1178.900678 2739.187235 1388.944578 2414.339439 6037.0 1.16871 2.244165 1.823517 383.769357 4.012053 34140430.0 -118625364.0 4083.0 1.0 519.71098 1.0 1.0 1.0 261.0 6.037800e+07 37688.0 3101.0 193476.407415 96337.0 0.0 7.0 1.010009 5.999555 1.181171 319.803397 278.296562 1964.261641 1.401464 170883.577166 27516.0 2015.0 27516.0 5377.607139 13.892409 6.048431e+13 10
2 1.931166 7.202607 646.883292 0.0 0.0 3.725948 5.784787 2.299263 66.0 1380.630396 73026.000000 1760.000608 1178.900678 73026.000000 1388.944578 2414.339439 6037.0 1.16871 2.244165 1.823517 383.769357 4.012053 33989359.0 -118394633.0 63085.0 1.0 519.71098 1.0 1.0 1.0 47.0 6.037703e+07 51617.0 3101.0 193476.407415 96095.0 0.0 7.0 1.010009 5.999555 2.000000 319.803397 278.296562 1964.261641 1.401464 650756.000000 1413387.0 2015.0 762631.0 20800.370000 13.892409 6.048431e+13 10
3 1.931166 7.202607 646.883292 0.0 0.0 3.000000 7.000000 2.299263 66.0 1380.630396 5068.000000 1760.000608 1178.900678 5068.000000 1388.944578 2414.339439 6037.0 1.16871 2.244165 1.823517 383.769357 4.012053 34148863.0 -118437206.0 7521.0 1.0 519.71098 1.0 1.0 1.0 47.0 6.037141e+07 12447.0 3101.0 27080.000000 96424.0 0.0 7.0 1.010009 5.999555 1.181171 319.803397 278.296562 1948.000000 1.000000 571346.000000 1156834.0 2015.0 585488.0 14557.570000 13.892409 6.048431e+13 10
4 1.931166 7.202607 646.883292 0.0 0.0 4.000000 5.784787 2.299263 66.0 1380.630396 1776.000000 1760.000608 1178.900678 1776.000000 1388.944578 2414.339439 6037.0 1.16871 2.244165 1.823517 383.769357 4.012053 34194168.0 -118385816.0 8512.0 1.0 519.71098 1.0 1.0 1.0 31.0 6.037123e+07 12447.0 3101.0 46795.000000 96450.0 0.0 7.0 1.010009 5.999555 1.000000 319.803397 278.296562 1947.000000 1.401464 193796.000000 433491.0 2015.0 239695.0 5725.170000 13.892409 6.048431e+13 10

In [148]:
month_pridict=[10,11,12,22,23,24]
result=[]
for m in month_pridict:
    test['transaction_month']=m
    re = model.predict(test)
    result.append(re)

In [158]:
sub = pd.read_csv('D:\\data\\sample_submission.csv')
for i in range(len(drop_month)):
    sub[drop_month[i]]=result[i]

In [153]:
#out=pd.DataFrame(X,columns=drop_month, index=sub_df['parcelid'].values)
#out.head()
#out.to_csv("D:\\data\\1.csv")


Out[153]:
201610 201611 201612 201710 201711 201712
10754147 -0.123942 0.542828 0.437484 0.455293 0.488694 0.488966
10759547 0.479547 0.479818 -0.123942 0.487762 0.488694 -0.123942
10843547 0.491397 0.396038 -0.123942 -0.123942 0.035097 0.533572
10859147 0.519695 0.489749 0.485059 0.540942 0.491397 0.491397
10879947 0.508219 -0.123942 0.435985 0.457975 0.484786 0.429386

In [161]:
sub.to_csv("D:\\data\\1.csv", index=False, float_format='%.4f')

In [ ]: