The aim of this competition is to predict the sale price of each property. The target variable is called price_doc in train.csv.

The training data is from August 2011 to June 2015, and the test set is from July 2015 to May 2016. The dataset also includes information about overall conditions in Russia's economy and finance sector, so you can focus on generating accurate price forecasts for individual properties, without needing to second-guess what the business cycle will do.

Data Files

train.csv, test.csv: information about individual transactions. The rows are indexed by the "id" field, which refers to individual transactions (particular properties might appear more than once, in separate transactions). These files also include supplementary information about the local area of each property. macro.csv: data on Russia's macroeconomy and financial sector (could be joined to the train and test sets on the "timestamp" column) sample_submission.csv: an example submission file in the correct format data_dictionary.txt: explanations of the fields available in the other data files


In [81]:
# Load libraries
import numpy
from numpy import arange
from matplotlib import pyplot
from pandas import read_csv
from pandas import set_option
from pandas.tools.plotting import scatter_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.metrics import mean_squared_error
from pandasql import sqldf

pysqldf = lambda q: sqldf(q, globals()) #this line makes it easy to work with locals and globals

In [40]:
filename = 'C:/Users/usjry/Documents/GitHub/ml_practice_notebooks/housing market/train.csv'
train = read_csv(filename, header = 0)

filename = 'C:/Users/usjry/Documents/GitHub/ml_practice_notebooks/housing market/test.csv'
test = read_csv(filename, header = 0)

filename = 'C:/Users/usjry/Documents/GitHub/ml_practice_notebooks/housing market/macro.csv'
macro = read_csv(filename, header = 0)

In [45]:
print(train.head(10))


   id   timestamp  full_sq  life_sq  floor  max_floor  material  build_year  \
0   1  2011-08-20       43     27.0    4.0        NaN       NaN         NaN   
1   2  2011-08-23       34     19.0    3.0        NaN       NaN         NaN   
2   3  2011-08-27       43     29.0    2.0        NaN       NaN         NaN   
3   4  2011-09-01       89     50.0    9.0        NaN       NaN         NaN   
4   5  2011-09-05       77     77.0    4.0        NaN       NaN         NaN   
5   6  2011-09-06       67     46.0   14.0        NaN       NaN         NaN   
6   7  2011-09-08       25     14.0   10.0        NaN       NaN         NaN   
7   8  2011-09-09       44     44.0    5.0        NaN       NaN         NaN   
8   9  2011-09-10       42     27.0    5.0        NaN       NaN         NaN   
9  10  2011-09-13       36     21.0    9.0        NaN       NaN         NaN   

   num_room  kitch_sq    ...      cafe_count_5000_price_2500  \
0       NaN       NaN    ...                               9   
1       NaN       NaN    ...                              15   
2       NaN       NaN    ...                              10   
3       NaN       NaN    ...                              11   
4       NaN       NaN    ...                             319   
5       NaN       NaN    ...                              62   
6       NaN       NaN    ...                              81   
7       NaN       NaN    ...                               9   
8       NaN       NaN    ...                              19   
9       NaN       NaN    ...                              19   

  cafe_count_5000_price_4000 cafe_count_5000_price_high  \
0                          4                          0   
1                          3                          0   
2                          3                          0   
3                          2                          1   
4                        108                         17   
5                         14                          1   
6                         16                          3   
7                          4                          0   
8                          8                          1   
9                         13                          0   

   big_church_count_5000  church_count_5000  mosque_count_5000  \
0                     13                 22                  1   
1                     15                 29                  1   
2                     11                 27                  0   
3                      4                  4                  0   
4                    135                236                  2   
5                     53                 78                  1   
6                     38                 80                  1   
7                     11                 18                  1   
8                     18                 34                  1   
9                     10                 20                  1   

   leisure_count_5000  sport_count_5000  market_count_5000  price_doc  
0                   0                52                  4    5850000  
1                  10                66                 14    6000000  
2                   4                67                 10    5700000  
3                   0                26                  3   13100000  
4                  91               195                 14   16331452  
5                  20               113                 17    9100000  
6                  27               127                  8    5500000  
7                   0                47                  4    2000000  
8                   3                85                 11    5300000  
9                   3                67                  1    2000000  

[10 rows x 292 columns]

In [49]:
dataset = train

In [50]:
# Summarize Data

# Descriptive statistics
# shape
print(dataset.shape)


(30471, 292)

In [51]:
# types
print(dataset.dtypes)


id                                         int64
timestamp                                 object
full_sq                                    int64
life_sq                                  float64
floor                                    float64
max_floor                                float64
material                                 float64
build_year                               float64
num_room                                 float64
kitch_sq                                 float64
state                                    float64
product_type                              object
sub_area                                  object
area_m                                   float64
raion_popul                                int64
green_zone_part                          float64
indust_part                              float64
children_preschool                         int64
preschool_quota                          float64
preschool_education_centers_raion          int64
children_school                            int64
school_quota                             float64
school_education_centers_raion             int64
school_education_centers_top_20_raion      int64
hospital_beds_raion                      float64
healthcare_centers_raion                   int64
university_top_20_raion                    int64
sport_objects_raion                        int64
additional_education_raion                 int64
culture_objects_top_25                    object
                                          ...   
big_church_count_3000                      int64
church_count_3000                          int64
mosque_count_3000                          int64
leisure_count_3000                         int64
sport_count_3000                           int64
market_count_3000                          int64
green_part_5000                          float64
prom_part_5000                           float64
office_count_5000                          int64
office_sqm_5000                            int64
trc_count_5000                             int64
trc_sqm_5000                               int64
cafe_count_5000                            int64
cafe_sum_5000_min_price_avg              float64
cafe_sum_5000_max_price_avg              float64
cafe_avg_price_5000                      float64
cafe_count_5000_na_price                   int64
cafe_count_5000_price_500                  int64
cafe_count_5000_price_1000                 int64
cafe_count_5000_price_1500                 int64
cafe_count_5000_price_2500                 int64
cafe_count_5000_price_4000                 int64
cafe_count_5000_price_high                 int64
big_church_count_5000                      int64
church_count_5000                          int64
mosque_count_5000                          int64
leisure_count_5000                         int64
sport_count_5000                           int64
market_count_5000                          int64
price_doc                                  int64
dtype: object

In [52]:
# head
print(dataset.head(20))


    id   timestamp  full_sq  life_sq  floor  max_floor  material  build_year  \
0    1  2011-08-20       43     27.0    4.0        NaN       NaN         NaN   
1    2  2011-08-23       34     19.0    3.0        NaN       NaN         NaN   
2    3  2011-08-27       43     29.0    2.0        NaN       NaN         NaN   
3    4  2011-09-01       89     50.0    9.0        NaN       NaN         NaN   
4    5  2011-09-05       77     77.0    4.0        NaN       NaN         NaN   
5    6  2011-09-06       67     46.0   14.0        NaN       NaN         NaN   
6    7  2011-09-08       25     14.0   10.0        NaN       NaN         NaN   
7    8  2011-09-09       44     44.0    5.0        NaN       NaN         NaN   
8    9  2011-09-10       42     27.0    5.0        NaN       NaN         NaN   
9   10  2011-09-13       36     21.0    9.0        NaN       NaN         NaN   
10  11  2011-09-16       36     19.0   12.0        NaN       NaN         NaN   
11  12  2011-09-16       38     19.0   11.0        NaN       NaN         NaN   
12  13  2011-09-17       43     28.0    4.0        NaN       NaN         NaN   
13  14  2011-09-19       31     31.0    4.0        NaN       NaN         NaN   
14  15  2011-09-19       31     21.0    3.0        NaN       NaN         NaN   
15  16  2011-09-20       51     31.0   15.0        NaN       NaN         NaN   
16  17  2011-09-20       47     31.0    4.0        NaN       NaN         NaN   
17  18  2011-09-20       42     28.0    2.0        NaN       NaN         NaN   
18  19  2011-09-22       59     33.0   10.0        NaN       NaN         NaN   
19  20  2011-09-22       44     29.0    4.0        NaN       NaN         NaN   

    num_room  kitch_sq    ...      cafe_count_5000_price_2500  \
0        NaN       NaN    ...                               9   
1        NaN       NaN    ...                              15   
2        NaN       NaN    ...                              10   
3        NaN       NaN    ...                              11   
4        NaN       NaN    ...                             319   
5        NaN       NaN    ...                              62   
6        NaN       NaN    ...                              81   
7        NaN       NaN    ...                               9   
8        NaN       NaN    ...                              19   
9        NaN       NaN    ...                              19   
10       NaN       NaN    ...                               1   
11       NaN       NaN    ...                               8   
12       NaN       NaN    ...                              13   
13       NaN       NaN    ...                             254   
14       NaN       NaN    ...                              88   
15       NaN       NaN    ...                               6   
16       NaN       NaN    ...                              10   
17       NaN       NaN    ...                              32   
18       NaN       NaN    ...                               1   
19       NaN       NaN    ...                               9   

   cafe_count_5000_price_4000 cafe_count_5000_price_high  \
0                           4                          0   
1                           3                          0   
2                           3                          0   
3                           2                          1   
4                         108                         17   
5                          14                          1   
6                          16                          3   
7                           4                          0   
8                           8                          1   
9                          13                          0   
10                          1                          0   
11                          3                          0   
12                          9                          1   
13                        108                         22   
14                         19                          2   
15                          1                          0   
16                          2                          0   
17                          6                          0   
18                          1                          0   
19                          2                          0   

    big_church_count_5000  church_count_5000  mosque_count_5000  \
0                      13                 22                  1   
1                      15                 29                  1   
2                      11                 27                  0   
3                       4                  4                  0   
4                     135                236                  2   
5                      53                 78                  1   
6                      38                 80                  1   
7                      11                 18                  1   
8                      18                 34                  1   
9                      10                 20                  1   
10                      5                  9                  0   
11                     10                  9                  0   
12                      7                 15                  0   
13                     57                102                  1   
14                     63                100                  0   
15                      9                 21                  0   
16                      7                 23                  0   
17                     13                 33                  1   
18                      6                  9                  0   
19                     10                 14                  0   

    leisure_count_5000  sport_count_5000  market_count_5000  price_doc  
0                    0                52                  4    5850000  
1                   10                66                 14    6000000  
2                    4                67                 10    5700000  
3                    0                26                  3   13100000  
4                   91               195                 14   16331452  
5                   20               113                 17    9100000  
6                   27               127                  8    5500000  
7                    0                47                  4    2000000  
8                    3                85                 11    5300000  
9                    3                67                  1    2000000  
10                   2                17                  6    4650000  
11                   0                35                  4    4800000  
12                   2                47                  0    5100000  
13                  72               166                  7    5200000  
14                  28               132                 14    5000000  
15                   1                53                  9    1850000  
16                   4                62                 13    6300000  
17                  10                72                 12    5900000  
18                   2                17                  6    7900000  
19                   2                51                  5    5200000  

[20 rows x 292 columns]

In [53]:
# descriptions, change precision to 2 places
set_option('precision', 1)
print(dataset.describe())


            id  full_sq  life_sq    floor  max_floor  material  build_year  \
count  30471.0  30471.0  24088.0  30304.0    20899.0   20899.0     1.7e+04   
mean   15237.9     54.2     34.4      7.7       12.6       1.8     3.1e+03   
std     8796.5     38.0     52.3      5.3        6.8       1.5     1.5e+05   
min        1.0      0.0      0.0      0.0        0.0       1.0     0.0e+00   
25%     7620.5     38.0     20.0      3.0        9.0       1.0     2.0e+03   
50%    15238.0     49.0     30.0      6.5       12.0       1.0     2.0e+03   
75%    22855.5     63.0     43.0     11.0       17.0       2.0     2.0e+03   
max    30473.0   5326.0   7478.0     77.0      117.0       6.0     2.0e+07   

       num_room  kitch_sq    state    ...      cafe_count_5000_price_2500  \
count   20899.0   20899.0  16912.0    ...                         30471.0   
mean        1.9       6.4      2.1    ...                            32.1   
std         0.9      28.3      0.9    ...                            73.5   
min         0.0       0.0      1.0    ...                             0.0   
25%         1.0       1.0      1.0    ...                             2.0   
50%         2.0       6.0      2.0    ...                             8.0   
75%         2.0       9.0      3.0    ...                            21.0   
max        19.0    2014.0     33.0    ...                           377.0   

       cafe_count_5000_price_4000  cafe_count_5000_price_high  \
count                     30471.0                     30471.0   
mean                         10.8                         1.8   
std                          28.4                         5.4   
min                           0.0                         0.0   
25%                           1.0                         0.0   
50%                           2.0                         0.0   
75%                           5.0                         1.0   
max                         147.0                        30.0   

       big_church_count_5000  church_count_5000  mosque_count_5000  \
count                30471.0            30471.0            30471.0   
mean                    15.0               30.3                0.4   
std                     29.1               47.3                0.6   
min                      0.0                0.0                0.0   
25%                      2.0                9.0                0.0   
50%                      7.0               16.0                0.0   
75%                     12.0               28.0                1.0   
max                    151.0              250.0                2.0   

       leisure_count_5000  sport_count_5000  market_count_5000  price_doc  
count             30471.0           30471.0            30471.0    3.0e+04  
mean                  8.6              52.8                6.0    7.1e+06  
std                  20.6              46.3                4.9    4.8e+06  
min                   0.0               0.0                0.0    1.0e+05  
25%                   0.0              11.0                1.0    4.7e+06  
50%                   2.0              48.0                5.0    6.3e+06  
75%                   7.0              76.0               10.0    8.3e+06  
max                 106.0             218.0               21.0    1.1e+08  

[8 rows x 276 columns]

In [54]:
# correlation
set_option('precision', 2)
print(dataset.corr(method='pearson'))


                                             id   full_sq   life_sq     floor  \
id                                     1.00e+00  2.14e-02  1.45e-02 -2.25e-02   
full_sq                                2.14e-02  1.00e+00  1.61e-01  8.96e-02   
life_sq                                1.45e-02  1.61e-01  1.00e+00  4.51e-02   
floor                                 -2.25e-02  8.96e-02  4.51e-02  1.00e+00   
max_floor                              7.04e-03  1.19e-01  4.24e-02  4.54e-01   
material                               8.51e-03  3.29e-02  1.14e-02 -9.04e-03   
build_year                            -1.27e-02 -6.04e-03 -2.40e-03  1.19e-03   
num_room                               1.44e-02  6.95e-01  1.91e-01 -5.66e-03   
kitch_sq                              -1.04e-02  2.00e-02  6.33e-04 -8.46e-03   
state                                 -1.20e-01 -9.13e-02 -6.66e-02 -1.14e-01   
area_m                                 6.36e-02  5.63e-02  6.14e-02 -1.94e-02   
raion_popul                           -3.63e-02 -3.89e-02 -6.40e-02 -4.69e-02   
green_zone_part                        6.51e-02  3.08e-02  3.71e-02  1.06e-03   
indust_part                           -2.98e-02 -5.36e-02 -3.89e-02 -1.79e-02   
children_preschool                    -3.31e-02 -2.56e-02 -5.31e-02 -1.20e-02   
preschool_quota                        4.26e-03 -3.57e-03 -3.79e-02  5.54e-02   
preschool_education_centers_raion     -4.88e-02 -1.07e-02 -5.03e-02  1.32e-02   
children_school                       -3.15e-02 -2.43e-02 -5.16e-02 -1.65e-02   
school_quota                           6.00e-03  1.63e-02  1.01e-03  8.02e-02   
school_education_centers_raion        -4.23e-02 -1.21e-02 -5.01e-02 -6.78e-03   
school_education_centers_top_20_raion  6.98e-03  2.07e-02  5.13e-03 -9.89e-03   
hospital_beds_raion                   -2.92e-02 -1.60e-03  2.74e-03 -1.02e-01   
healthcare_centers_raion              -8.65e-03 -7.84e-03 -2.84e-02 -7.67e-02   
university_top_20_raion                1.06e-02  4.57e-02  2.49e-02  1.38e-03   
sport_objects_raion                    3.37e-04  2.81e-02 -1.79e-03 -3.58e-02   
additional_education_raion            -8.57e-03  7.40e-03 -3.37e-03  4.45e-02   
culture_objects_top_25_raion           4.14e-02  2.62e-02  3.38e-02  1.48e-02   
shopping_centers_raion                 9.47e-03  1.02e-02 -4.09e-03  1.60e-02   
office_raion                           2.91e-02  3.35e-02  3.21e-02 -1.96e-02   
full_all                              -1.70e-02  1.29e-02 -2.27e-02 -2.81e-02   
...                                         ...       ...       ...       ...   
big_church_count_3000                  1.91e-02  2.89e-02  2.56e-02 -1.65e-02   
church_count_3000                      2.04e-02  3.07e-02  2.66e-02 -1.14e-02   
mosque_count_3000                     -2.68e-02  2.36e-02  2.29e-02  6.01e-03   
leisure_count_3000                     3.02e-02  3.24e-02  3.31e-02 -8.43e-03   
sport_count_3000                      -1.23e-02  1.06e-02 -9.11e-03 -8.68e-02   
market_count_3000                     -1.16e-02 -3.53e-02 -4.03e-02 -8.15e-02   
green_part_5000                        4.17e-02  2.46e-03  2.30e-02 -3.34e-02   
prom_part_5000                         2.04e-02 -5.84e-02 -5.67e-02 -1.03e-01   
office_count_5000                      1.73e-02  3.15e-02  2.25e-02 -5.06e-02   
office_sqm_5000                        2.10e-02  2.61e-02  1.59e-02 -6.43e-02   
trc_count_5000                        -2.50e-02  6.79e-04 -2.26e-02 -7.81e-02   
trc_sqm_5000                          -1.54e-02 -6.65e-03 -2.47e-02 -8.27e-02   
cafe_count_5000                        1.48e-02  3.45e-02  2.35e-02 -4.35e-02   
cafe_sum_5000_min_price_avg            4.54e-02  4.89e-02  5.53e-02  2.90e-02   
cafe_sum_5000_max_price_avg            4.58e-02  4.82e-02  5.38e-02  3.63e-02   
cafe_avg_price_5000                    4.57e-02  4.85e-02  5.44e-02  3.34e-02   
cafe_count_5000_na_price               1.23e-02  3.31e-02  2.28e-02 -4.62e-02   
cafe_count_5000_price_500              1.39e-02  3.06e-02  2.03e-02 -5.11e-02   
cafe_count_5000_price_1000             1.22e-02  3.18e-02  1.95e-02 -4.77e-02   
cafe_count_5000_price_1500             1.40e-02  3.68e-02  2.49e-02 -3.92e-02   
cafe_count_5000_price_2500             1.86e-02  3.91e-02  2.87e-02 -3.48e-02   
cafe_count_5000_price_4000             2.30e-02  4.01e-02  3.16e-02 -2.96e-02   
cafe_count_5000_price_high             2.37e-02  4.36e-02  3.49e-02 -2.23e-02   
big_church_count_5000                  1.28e-02  2.69e-02  1.78e-02 -4.44e-02   
church_count_5000                      1.52e-02  2.86e-02  1.81e-02 -4.56e-02   
mosque_count_5000                     -4.88e-02  2.16e-02  9.95e-03 -1.23e-02   
leisure_count_5000                     2.07e-02  3.02e-02  2.27e-02 -4.42e-02   
sport_count_5000                      -1.17e-02  1.58e-03 -1.45e-02 -1.02e-01   
market_count_5000                     -2.56e-02 -4.13e-02 -5.03e-02 -1.24e-01   
price_doc                              1.21e-01  3.42e-01  1.66e-01  1.17e-01   

                                       max_floor  material  build_year  \
id                                      7.04e-03  8.51e-03   -1.27e-02   
full_sq                                 1.19e-01  3.29e-02   -6.04e-03   
life_sq                                 4.24e-02  1.14e-02   -2.40e-03   
floor                                   4.54e-01 -9.04e-03    1.19e-03   
max_floor                               1.00e+00  4.59e-02   -2.61e-04   
material                                4.59e-02  1.00e+00   -4.40e-03   
build_year                             -2.61e-04 -4.40e-03    1.00e+00   
num_room                               -1.42e-02 -2.69e-02   -8.25e-03   
kitch_sq                                2.03e-02  3.87e-02    5.96e-04   
state                                  -7.22e-02 -3.42e-02    2.95e-01   
area_m                                 -9.30e-02  1.18e-03   -4.57e-03   
raion_popul                            -1.61e-02 -4.93e-02    1.46e-03   
green_zone_part                         1.41e-02  1.82e-02   -7.21e-03   
indust_part                            -1.55e-02 -2.43e-02    1.13e-03   
children_preschool                      2.14e-02 -6.20e-02    2.45e-05   
preschool_quota                         8.49e-02 -1.43e-01   -5.14e-03   
preschool_education_centers_raion       4.20e-02 -7.86e-02   -1.08e-03   
children_school                         1.43e-02 -5.97e-02    5.78e-04   
school_quota                            9.77e-02 -1.29e-01   -2.85e-03   
school_education_centers_raion          1.20e-02 -4.78e-02   -2.46e-04   
school_education_centers_top_20_raion  -1.43e-02 -1.69e-02   -2.57e-03   
hospital_beds_raion                    -1.53e-01  8.68e-02   -8.99e-03   
healthcare_centers_raion               -7.04e-02  2.17e-02    7.75e-03   
university_top_20_raion                 8.81e-03  5.06e-02   -2.50e-03   
sport_objects_raion                    -2.15e-02  3.38e-02   -1.50e-03   
additional_education_raion              6.50e-02 -6.39e-02    2.40e-03   
culture_objects_top_25_raion            2.30e-02 -1.42e-02   -1.59e-03   
shopping_centers_raion                  2.56e-02 -5.95e-02    2.46e-03   
office_raion                           -2.06e-02  1.68e-02    5.12e-04   
full_all                               -3.83e-04  2.46e-02   -6.60e-04   
...                                          ...       ...         ...   
big_church_count_3000                  -1.96e-02  4.94e-03   -1.26e-03   
church_count_3000                      -1.56e-02  8.16e-03    7.83e-04   
mosque_count_3000                      -1.23e-02 -9.02e-03    1.42e-02   
leisure_count_3000                     -1.11e-02  1.96e-03    3.17e-05   
sport_count_3000                       -8.45e-02  7.28e-02    6.18e-03   
market_count_3000                      -8.45e-02  4.62e-02   -5.22e-03   
green_part_5000                        -5.05e-02 -1.99e-03   -4.83e-03   
prom_part_5000                         -8.20e-02  7.57e-02   -1.48e-03   
office_count_5000                      -5.54e-02  6.00e-02    4.83e-04   
office_sqm_5000                        -6.80e-02  9.13e-02    4.29e-03   
trc_count_5000                         -7.41e-02  6.56e-02    5.40e-03   
trc_sqm_5000                           -7.28e-02  6.85e-02    4.26e-03   
cafe_count_5000                        -4.74e-02  5.50e-02    1.98e-03   
cafe_sum_5000_min_price_avg            -4.00e-02  9.68e-02    3.30e-04   
cafe_sum_5000_max_price_avg            -2.68e-02  9.12e-02    3.01e-04   
cafe_avg_price_5000                    -3.20e-02  9.35e-02    3.12e-04   
cafe_count_5000_na_price               -4.72e-02  5.51e-02    7.62e-04   
cafe_count_5000_price_500              -5.37e-02  5.58e-02    1.63e-03   
cafe_count_5000_price_1000             -5.09e-02  6.00e-02    3.16e-03   
cafe_count_5000_price_1500             -4.48e-02  5.35e-02    2.57e-03   
cafe_count_5000_price_2500             -4.02e-02  5.09e-02    1.32e-03   
cafe_count_5000_price_4000             -3.37e-02  4.37e-02   -9.06e-04   
cafe_count_5000_price_high             -2.95e-02  4.44e-02    2.14e-04   
big_church_count_5000                  -4.82e-02  3.85e-02   -1.59e-03   
church_count_5000                      -4.82e-02  4.47e-02   -9.90e-05   
mosque_count_5000                      -5.83e-02  5.04e-02    1.96e-02   
leisure_count_5000                     -4.87e-02  4.45e-02   -8.30e-04   
sport_count_5000                       -9.99e-02  9.85e-02    5.98e-03   
market_count_5000                      -1.13e-01  7.64e-02    7.36e-03   
price_doc                               9.44e-02  6.40e-02    2.16e-03   

                                       num_room  kitch_sq     state  \
id                                     1.44e-02 -1.04e-02 -1.20e-01   
full_sq                                6.95e-01  2.00e-02 -9.13e-02   
life_sq                                1.91e-01  6.33e-04 -6.66e-02   
floor                                 -5.66e-03 -8.46e-03 -1.14e-01   
max_floor                             -1.42e-02  2.03e-02 -7.22e-02   
material                              -2.69e-02  3.87e-02 -3.42e-02   
build_year                            -8.25e-03  5.96e-04  2.95e-01   
num_room                               1.00e+00  1.70e-02  8.20e-02   
kitch_sq                               1.70e-02  1.00e+00  4.87e-02   
state                                  8.20e-02  4.87e-02  1.00e+00   
area_m                                -4.08e-02 -2.34e-02 -2.95e-01   
raion_popul                            7.40e-02  3.76e-02  3.74e-01   
green_zone_part                       -2.66e-02 -1.05e-02 -1.60e-01   
indust_part                           -3.67e-02  3.73e-04  5.81e-02   
children_preschool                     6.35e-02  3.48e-02  3.29e-01   
preschool_quota                       -5.71e-03  1.28e-02  9.89e-02   
preschool_education_centers_raion      7.40e-02  3.22e-02  3.14e-01   
children_school                        6.76e-02  3.36e-02  3.22e-01   
school_quota                           1.45e-02  5.08e-03  5.92e-02   
school_education_centers_raion         8.28e-02  3.40e-02  3.15e-01   
school_education_centers_top_20_raion  3.75e-02 -1.67e-03  4.65e-02   
hospital_beds_raion                    4.85e-02  1.06e-02  1.19e-01   
healthcare_centers_raion               6.18e-02  2.84e-02  2.26e-01   
university_top_20_raion                4.40e-02 -5.22e-03 -2.26e-02   
sport_objects_raion                    9.23e-02  2.01e-02  1.76e-01   
additional_education_raion             1.92e-02 -3.06e-03  4.33e-02   
culture_objects_top_25_raion           5.96e-03 -2.03e-02 -1.25e-01   
shopping_centers_raion                 6.30e-02  5.56e-03  1.20e-01   
office_raion                           4.24e-02 -1.37e-02 -5.52e-02   
full_all                               6.10e-03  1.17e-02  1.02e-01   
...                                         ...       ...       ...   
big_church_count_3000                  4.32e-02 -1.04e-02 -3.43e-02   
church_count_3000                      4.26e-02 -1.20e-02 -4.05e-02   
mosque_count_3000                      3.68e-02  1.11e-03  1.30e-02   
leisure_count_3000                     3.32e-02 -1.50e-02 -7.80e-02   
sport_count_3000                       9.47e-02  1.54e-02  1.96e-01   
market_count_3000                      4.94e-02  1.43e-02  2.11e-01   
green_part_5000                       -2.71e-02 -1.74e-02 -1.01e-01   
prom_part_5000                         1.14e-02  1.79e-02  1.77e-01   
office_count_5000                      6.52e-02 -4.64e-03  8.05e-03   
office_sqm_5000                        7.53e-02 -5.22e-04  5.04e-02   
trc_count_5000                         9.58e-02  1.81e-02  2.40e-01   
trc_sqm_5000                           8.58e-02  1.67e-02  2.31e-01   
cafe_count_5000                        6.80e-02 -4.41e-03  1.88e-02   
cafe_sum_5000_min_price_avg           -1.09e-02 -1.39e-02 -2.22e-01   
cafe_sum_5000_max_price_avg           -1.28e-02 -1.51e-02 -2.25e-01   
cafe_avg_price_5000                   -1.21e-02 -1.47e-02 -2.24e-01   
cafe_count_5000_na_price               6.77e-02 -4.24e-03  2.63e-02   
cafe_count_5000_price_500              6.79e-02 -3.63e-03  2.75e-02   
cafe_count_5000_price_1000             7.12e-02 -2.44e-03  3.56e-02   
cafe_count_5000_price_1500             6.86e-02 -4.66e-03  1.63e-02   
cafe_count_5000_price_2500             6.46e-02 -6.76e-03 -4.71e-03   
cafe_count_5000_price_4000             5.74e-02 -8.28e-03 -2.28e-02   
cafe_count_5000_price_high             5.81e-02 -9.21e-03 -3.15e-02   
big_church_count_5000                  6.09e-02 -4.00e-03  1.58e-02   
church_count_5000                      6.52e-02 -3.66e-03  2.30e-02   
mosque_count_5000                      6.28e-02  1.32e-02  9.19e-02   
leisure_count_5000                     5.87e-02 -6.26e-03 -3.88e-03   
sport_count_5000                       8.99e-02  1.69e-02  1.94e-01   
market_count_5000                      6.17e-02  2.64e-02  2.73e-01   
price_doc                              4.76e-01  2.87e-02  1.21e-01   

                                         ...      cafe_count_5000_price_2500  \
id                                       ...                        1.86e-02   
full_sq                                  ...                        3.91e-02   
life_sq                                  ...                        2.87e-02   
floor                                    ...                       -3.48e-02   
max_floor                                ...                       -4.02e-02   
material                                 ...                        5.09e-02   
build_year                               ...                        1.32e-03   
num_room                                 ...                        6.46e-02   
kitch_sq                                 ...                       -6.76e-03   
state                                    ...                       -4.71e-03   
area_m                                   ...                       -1.93e-01   
raion_popul                              ...                        1.41e-02   
green_zone_part                          ...                       -2.62e-01   
indust_part                              ...                       -1.16e-01   
children_preschool                       ...                       -1.77e-02   
preschool_quota                          ...                       -2.78e-01   
preschool_education_centers_raion        ...                        2.39e-02   
children_school                          ...                        3.52e-02   
school_quota                             ...                       -1.84e-01   
school_education_centers_raion           ...                        6.39e-02   
school_education_centers_top_20_raion    ...                        3.33e-01   
hospital_beds_raion                      ...                        1.91e-01   
healthcare_centers_raion                 ...                        3.05e-01   
university_top_20_raion                  ...                        6.96e-01   
sport_objects_raion                      ...                        6.74e-01   
additional_education_raion               ...                        4.11e-01   
culture_objects_top_25_raion             ...                        7.42e-01   
shopping_centers_raion                   ...                        5.57e-01   
office_raion                             ...                        9.02e-01   
full_all                                 ...                        3.20e-04   
...                                      ...                             ...   
big_church_count_3000                    ...                        9.09e-01   
church_count_3000                        ...                        9.15e-01   
mosque_count_3000                        ...                        5.53e-01   
leisure_count_3000                       ...                        8.93e-01   
sport_count_3000                         ...                        8.15e-01   
market_count_3000                        ...                        3.52e-01   
green_part_5000                          ...                       -3.81e-01   
prom_part_5000                           ...                        6.78e-02   
office_count_5000                        ...                        9.87e-01   
office_sqm_5000                          ...                        9.43e-01   
trc_count_5000                           ...                        7.59e-01   
trc_sqm_5000                             ...                        6.59e-01   
cafe_count_5000                          ...                        9.96e-01   
cafe_sum_5000_min_price_avg              ...                        2.10e-01   
cafe_sum_5000_max_price_avg              ...                        2.04e-01   
cafe_avg_price_5000                      ...                        2.07e-01   
cafe_count_5000_na_price                 ...                        9.88e-01   
cafe_count_5000_price_500                ...                        9.91e-01   
cafe_count_5000_price_1000               ...                        9.88e-01   
cafe_count_5000_price_1500               ...                        9.93e-01   
cafe_count_5000_price_2500               ...                        1.00e+00   
cafe_count_5000_price_4000               ...                        9.90e-01   
cafe_count_5000_price_high               ...                        9.73e-01   
big_church_count_5000                    ...                        9.61e-01   
church_count_5000                        ...                        9.70e-01   
mosque_count_5000                        ...                        5.29e-01   
leisure_count_5000                       ...                        9.86e-01   
sport_count_5000                         ...                        8.24e-01   
market_count_5000                        ...                        4.32e-01   
price_doc                                ...                        2.26e-01   

                                       cafe_count_5000_price_4000  \
id                                                       2.30e-02   
full_sq                                                  4.01e-02   
life_sq                                                  3.16e-02   
floor                                                   -2.96e-02   
max_floor                                               -3.37e-02   
material                                                 4.37e-02   
build_year                                              -9.06e-04   
num_room                                                 5.74e-02   
kitch_sq                                                -8.28e-03   
state                                                   -2.28e-02   
area_m                                                  -1.58e-01   
raion_popul                                             -1.83e-03   
green_zone_part                                         -2.40e-01   
indust_part                                             -1.27e-01   
children_preschool                                      -2.73e-02   
preschool_quota                                         -2.49e-01   
preschool_education_centers_raion                        1.67e-02   
children_school                                          2.38e-02   
school_quota                                            -1.60e-01   
school_education_centers_raion                           5.04e-02   
school_education_centers_top_20_raion                    3.23e-01   
hospital_beds_raion                                      1.72e-01   
healthcare_centers_raion                                 2.74e-01   
university_top_20_raion                                  6.74e-01   
sport_objects_raion                                      6.61e-01   
additional_education_raion                               3.98e-01   
culture_objects_top_25_raion                             7.43e-01   
shopping_centers_raion                                   5.39e-01   
office_raion                                             8.97e-01   
full_all                                                -2.30e-02   
...                                                           ...   
big_church_count_3000                                    8.97e-01   
church_count_3000                                        9.00e-01   
mosque_count_3000                                        5.30e-01   
leisure_count_3000                                       8.88e-01   
sport_count_3000                                         7.73e-01   
market_count_3000                                        3.10e-01   
green_part_5000                                         -3.66e-01   
prom_part_5000                                           3.28e-02   
office_count_5000                                        9.67e-01   
office_sqm_5000                                          9.19e-01   
trc_count_5000                                           7.11e-01   
trc_sqm_5000                                             6.22e-01   
cafe_count_5000                                          9.80e-01   
cafe_sum_5000_min_price_avg                              2.35e-01   
cafe_sum_5000_max_price_avg                              2.29e-01   
cafe_avg_price_5000                                      2.32e-01   
cafe_count_5000_na_price                                 9.77e-01   
cafe_count_5000_price_500                                9.71e-01   
cafe_count_5000_price_1000                               9.64e-01   
cafe_count_5000_price_1500                               9.76e-01   
cafe_count_5000_price_2500                               9.90e-01   
cafe_count_5000_price_4000                               1.00e+00   
cafe_count_5000_price_high                               9.81e-01   
big_church_count_5000                                    9.42e-01   
church_count_5000                                        9.45e-01   
mosque_count_5000                                        4.93e-01   
leisure_count_5000                                       9.75e-01   
sport_count_5000                                         7.79e-01   
market_count_5000                                        3.80e-01   
price_doc                                                2.10e-01   

                                       cafe_count_5000_price_high  \
id                                                       2.37e-02   
full_sq                                                  4.36e-02   
life_sq                                                  3.49e-02   
floor                                                   -2.23e-02   
max_floor                                               -2.95e-02   
material                                                 4.44e-02   
build_year                                               2.14e-04   
num_room                                                 5.81e-02   
kitch_sq                                                -9.21e-03   
state                                                   -3.15e-02   
area_m                                                  -1.55e-01   
raion_popul                                             -1.59e-02   
green_zone_part                                         -2.28e-01   
indust_part                                             -1.37e-01   
children_preschool                                      -3.65e-02   
preschool_quota                                         -2.45e-01   
preschool_education_centers_raion                        1.27e-02   
children_school                                          1.17e-02   
school_quota                                            -1.51e-01   
school_education_centers_raion                           3.68e-02   
school_education_centers_top_20_raion                    3.24e-01   
hospital_beds_raion                                      1.44e-01   
healthcare_centers_raion                                 2.48e-01   
university_top_20_raion                                  6.47e-01   
sport_objects_raion                                      6.32e-01   
additional_education_raion                               3.95e-01   
culture_objects_top_25_raion                             7.32e-01   
shopping_centers_raion                                   5.01e-01   
office_raion                                             8.68e-01   
full_all                                                -1.78e-02   
...                                                           ...   
big_church_count_3000                                    8.47e-01   
church_count_3000                                        8.61e-01   
mosque_count_3000                                        5.07e-01   
leisure_count_3000                                       8.64e-01   
sport_count_3000                                         7.48e-01   
market_count_3000                                        2.92e-01   
green_part_5000                                         -3.47e-01   
prom_part_5000                                          -1.08e-02   
office_count_5000                                        9.40e-01   
office_sqm_5000                                          9.03e-01   
trc_count_5000                                           6.65e-01   
trc_sqm_5000                                             5.85e-01   
cafe_count_5000                                          9.57e-01   
cafe_sum_5000_min_price_avg                              2.34e-01   
cafe_sum_5000_max_price_avg                              2.28e-01   
cafe_avg_price_5000                                      2.31e-01   
cafe_count_5000_na_price                                 9.52e-01   
cafe_count_5000_price_500                                9.51e-01   
cafe_count_5000_price_1000                               9.41e-01   
cafe_count_5000_price_1500                               9.49e-01   
cafe_count_5000_price_2500                               9.73e-01   
cafe_count_5000_price_4000                               9.81e-01   
cafe_count_5000_price_high                               1.00e+00   
big_church_count_5000                                    8.88e-01   
church_count_5000                                        9.00e-01   
mosque_count_5000                                        4.81e-01   
leisure_count_5000                                       9.48e-01   
sport_count_5000                                         7.53e-01   
market_count_5000                                        3.53e-01   
price_doc                                                2.14e-01   

                                       big_church_count_5000  \
id                                                  1.28e-02   
full_sq                                             2.69e-02   
life_sq                                             1.78e-02   
floor                                              -4.44e-02   
max_floor                                          -4.82e-02   
material                                            3.85e-02   
build_year                                         -1.59e-03   
num_room                                            6.09e-02   
kitch_sq                                           -4.00e-03   
state                                               1.58e-02   
area_m                                             -2.22e-01   
raion_popul                                         4.23e-02   
green_zone_part                                    -2.81e-01   
indust_part                                        -7.89e-02   
children_preschool                                  6.21e-03   
preschool_quota                                    -2.70e-01   
preschool_education_centers_raion                   4.05e-02   
children_school                                     6.05e-02   
school_quota                                       -1.93e-01   
school_education_centers_raion                      9.18e-02   
school_education_centers_top_20_raion               3.18e-01   
hospital_beds_raion                                 2.17e-01   
healthcare_centers_raion                            3.43e-01   
university_top_20_raion                             6.89e-01   
sport_objects_raion                                 6.68e-01   
additional_education_raion                          4.17e-01   
culture_objects_top_25_raion                        7.19e-01   
shopping_centers_raion                              5.95e-01   
office_raion                                        8.95e-01   
full_all                                            8.68e-03   
...                                                      ...   
big_church_count_3000                               9.46e-01   
church_count_3000                                   9.30e-01   
mosque_count_3000                                   5.34e-01   
leisure_count_3000                                  8.84e-01   
sport_count_3000                                    8.18e-01   
market_count_3000                                   3.96e-01   
green_part_5000                                    -4.08e-01   
prom_part_5000                                      1.48e-01   
office_count_5000                                   9.77e-01   
office_sqm_5000                                     9.13e-01   
trc_count_5000                                      7.97e-01   
trc_sqm_5000                                        6.93e-01   
cafe_count_5000                                     9.70e-01   
cafe_sum_5000_min_price_avg                         1.47e-01   
cafe_sum_5000_max_price_avg                         1.42e-01   
cafe_avg_price_5000                                 1.44e-01   
cafe_count_5000_na_price                            9.67e-01   
cafe_count_5000_price_500                           9.67e-01   
cafe_count_5000_price_1000                          9.68e-01   
cafe_count_5000_price_1500                          9.76e-01   
cafe_count_5000_price_2500                          9.61e-01   
cafe_count_5000_price_4000                          9.42e-01   
cafe_count_5000_price_high                          8.88e-01   
big_church_count_5000                               1.00e+00   
church_count_5000                                   9.88e-01   
mosque_count_5000                                   5.02e-01   
leisure_count_5000                                  9.69e-01   
sport_count_5000                                    8.26e-01   
market_count_5000                                   4.83e-01   
price_doc                                           1.99e-01   

                                       church_count_5000  mosque_count_5000  \
id                                              1.52e-02          -4.88e-02   
full_sq                                         2.86e-02           2.16e-02   
life_sq                                         1.81e-02           9.95e-03   
floor                                          -4.56e-02          -1.23e-02   
max_floor                                      -4.82e-02          -5.83e-02   
material                                        4.47e-02           5.04e-02   
build_year                                     -9.90e-05           1.96e-02   
num_room                                        6.52e-02           6.28e-02   
kitch_sq                                       -3.66e-03           1.32e-02   
state                                           2.30e-02           9.19e-02   
area_m                                         -2.10e-01          -8.68e-02   
raion_popul                                     4.78e-02           3.42e-03   
green_zone_part                                -2.68e-01          -2.74e-01   
indust_part                                    -1.03e-01          -8.51e-02   
children_preschool                              3.65e-03          -3.62e-02   
preschool_quota                                -2.68e-01          -2.16e-01   
preschool_education_centers_raion               4.07e-02          -6.71e-02   
children_school                                 5.89e-02           8.06e-03   
school_quota                                   -1.90e-01          -1.37e-01   
school_education_centers_raion                  9.40e-02          -3.85e-02   
school_education_centers_top_20_raion           3.30e-01           2.68e-01   
hospital_beds_raion                             2.29e-01           1.00e-01   
healthcare_centers_raion                        3.49e-01           1.45e-01   
university_top_20_raion                         7.07e-01           5.03e-01   
sport_objects_raion                             6.72e-01           3.35e-01   
additional_education_raion                      4.30e-01           1.65e-01   
culture_objects_top_25_raion                    7.26e-01           4.06e-01   
shopping_centers_raion                          5.94e-01           3.03e-01   
office_raion                                    8.95e-01           4.78e-01   
full_all                                        1.03e-02          -8.58e-02   
...                                                  ...                ...   
big_church_count_3000                           9.32e-01           4.83e-01   
church_count_3000                               9.36e-01           5.17e-01   
mosque_count_3000                               5.66e-01           6.40e-01   
leisure_count_3000                              8.89e-01           4.77e-01   
sport_count_3000                                8.35e-01           4.96e-01   
market_count_3000                               4.21e-01           1.91e-01   
green_part_5000                                -3.79e-01          -2.62e-01   
prom_part_5000                                  1.61e-01           1.18e-01   
office_count_5000                               9.83e-01           5.22e-01   
office_sqm_5000                                 9.27e-01           5.32e-01   
trc_count_5000                                  8.11e-01           5.36e-01   
trc_sqm_5000                                    6.90e-01           5.11e-01   
cafe_count_5000                                 9.80e-01           5.41e-01   
cafe_sum_5000_min_price_avg                     1.64e-01           1.63e-01   
cafe_sum_5000_max_price_avg                     1.58e-01           1.39e-01   
cafe_avg_price_5000                             1.61e-01           1.49e-01   
cafe_count_5000_na_price                        9.70e-01           5.23e-01   
cafe_count_5000_price_500                       9.79e-01           5.41e-01   
cafe_count_5000_price_1000                      9.80e-01           5.49e-01   
cafe_count_5000_price_1500                      9.83e-01           5.50e-01   
cafe_count_5000_price_2500                      9.70e-01           5.29e-01   
cafe_count_5000_price_4000                      9.45e-01           4.93e-01   
cafe_count_5000_price_high                      9.00e-01           4.81e-01   
big_church_count_5000                           9.88e-01           5.02e-01   
church_count_5000                               1.00e+00           5.50e-01   
mosque_count_5000                               5.50e-01           1.00e+00   
leisure_count_5000                              9.75e-01           4.99e-01   
sport_count_5000                                8.48e-01           5.18e-01   
market_count_5000                               5.14e-01           2.42e-01   
price_doc                                       2.13e-01           1.75e-01   

                                       leisure_count_5000  sport_count_5000  \
id                                               2.07e-02         -1.17e-02   
full_sq                                          3.02e-02          1.58e-03   
life_sq                                          2.27e-02         -1.45e-02   
floor                                           -4.42e-02         -1.02e-01   
max_floor                                       -4.87e-02         -9.99e-02   
material                                         4.45e-02          9.85e-02   
build_year                                      -8.30e-04          5.98e-03   
num_room                                         5.87e-02          8.99e-02   
kitch_sq                                        -6.26e-03          1.69e-02   
state                                           -3.88e-03          1.94e-01   
area_m                                          -1.95e-01         -4.16e-01   
raion_popul                                      1.87e-02          2.89e-01   
green_zone_part                                 -2.62e-01         -3.58e-01   
indust_part                                     -1.19e-01          1.08e-04   
children_preschool                              -2.07e-02          2.05e-01   
preschool_quota                                 -2.61e-01         -3.68e-01   
preschool_education_centers_raion                3.70e-02          2.09e-01   
children_school                                  3.21e-02          2.50e-01   
school_quota                                    -1.81e-01         -2.78e-01   
school_education_centers_raion                   7.76e-02          2.83e-01   
school_education_centers_top_20_raion            3.37e-01          3.04e-01   
hospital_beds_raion                              2.09e-01          3.19e-01   
healthcare_centers_raion                         3.14e-01          5.11e-01   
university_top_20_raion                          7.02e-01          5.94e-01   
sport_objects_raion                              6.65e-01          7.36e-01   
additional_education_raion                       4.16e-01          3.88e-01   
culture_objects_top_25_raion                     7.45e-01          5.37e-01   
shopping_centers_raion                           5.52e-01          5.62e-01   
office_raion                                     9.04e-01          7.14e-01   
full_all                                         7.71e-03          8.47e-02   
...                                                   ...               ...   
big_church_count_3000                            9.23e-01          7.28e-01   
church_count_3000                                9.27e-01          7.35e-01   
mosque_count_3000                                5.45e-01          4.73e-01   
leisure_count_3000                               9.05e-01          6.75e-01   
sport_count_3000                                 8.04e-01          9.50e-01   
market_count_3000                                3.82e-01          5.97e-01   
green_part_5000                                 -3.75e-01         -4.59e-01   
prom_part_5000                                   8.42e-02          4.11e-01   
office_count_5000                                9.81e-01          8.53e-01   
office_sqm_5000                                  9.22e-01          8.91e-01   
trc_count_5000                                   7.50e-01          9.23e-01   
trc_sqm_5000                                     6.39e-01          8.32e-01   
cafe_count_5000                                  9.85e-01          8.56e-01   
cafe_sum_5000_min_price_avg                      1.70e-01          1.79e-02   
cafe_sum_5000_max_price_avg                      1.65e-01          1.72e-02   
cafe_avg_price_5000                              1.67e-01          1.75e-02   
cafe_count_5000_na_price                         9.71e-01          8.59e-01   
cafe_count_5000_price_500                        9.81e-01          8.64e-01   
cafe_count_5000_price_1000                       9.79e-01          8.81e-01   
cafe_count_5000_price_1500                       9.84e-01          8.49e-01   
cafe_count_5000_price_2500                       9.86e-01          8.24e-01   
cafe_count_5000_price_4000                       9.75e-01          7.79e-01   
cafe_count_5000_price_high                       9.48e-01          7.53e-01   
big_church_count_5000                            9.69e-01          8.26e-01   
church_count_5000                                9.75e-01          8.48e-01   
mosque_count_5000                                4.99e-01          5.18e-01   
leisure_count_5000                               1.00e+00          8.09e-01   
sport_count_5000                                 8.09e-01          1.00e+00   
market_count_5000                                4.63e-01          7.34e-01   
price_doc                                        2.00e-01          2.95e-01   

                                       market_count_5000  price_doc  
id                                             -2.56e-02   1.21e-01  
full_sq                                        -4.13e-02   3.42e-01  
life_sq                                        -5.03e-02   1.66e-01  
floor                                          -1.24e-01   1.17e-01  
max_floor                                      -1.13e-01   9.44e-02  
material                                        7.64e-02   6.40e-02  
build_year                                      7.36e-03   2.16e-03  
num_room                                        6.17e-02   4.76e-01  
kitch_sq                                        2.64e-02   2.87e-02  
state                                           2.73e-01   1.21e-01  
area_m                                         -4.50e-01  -1.67e-01  
raion_popul                                     4.20e-01   1.52e-01  
green_zone_part                                -3.47e-01  -9.45e-02  
indust_part                                     9.85e-02  -8.26e-02  
children_preschool                              3.11e-01   1.34e-01  
preschool_quota                                -2.07e-01  -7.51e-02  
preschool_education_centers_raion               3.30e-01   1.59e-01  
children_school                                 3.45e-01   1.39e-01  
school_quota                                   -2.18e-01  -1.40e-02  
school_education_centers_raion                  4.14e-01   1.94e-01  
school_education_centers_top_20_raion           1.69e-01   1.02e-01  
hospital_beds_raion                             3.62e-01   1.47e-01  
healthcare_centers_raion                        4.76e-01   1.90e-01  
university_top_20_raion                         3.29e-01   1.66e-01  
sport_objects_raion                             5.10e-01   2.53e-01  
additional_education_raion                      2.84e-01   5.77e-02  
culture_objects_top_25_raion                    2.46e-01   4.43e-02  
shopping_centers_raion                          4.16e-01   1.51e-01  
office_raion                                    3.64e-01   1.46e-01  
full_all                                        6.46e-02   2.53e-02  
...                                                  ...        ...  
big_church_count_3000                           3.93e-01   1.32e-01  
church_count_3000                               4.12e-01   1.36e-01  
mosque_count_3000                               2.33e-01   9.62e-02  
leisure_count_3000                              3.48e-01   1.10e-01  
sport_count_3000                                6.82e-01   2.91e-01  
market_count_3000                               7.91e-01   1.52e-01  
green_part_5000                                -4.47e-01  -1.58e-01  
prom_part_5000                                  6.39e-01   8.78e-02  
office_count_5000                               4.91e-01   2.19e-01  
office_sqm_5000                                 5.68e-01   2.70e-01  
trc_count_5000                                  7.69e-01   2.89e-01  
trc_sqm_5000                                    6.50e-01   2.68e-01  
cafe_count_5000                                 4.90e-01   2.32e-01  
cafe_sum_5000_min_price_avg                    -2.25e-01   3.22e-02  
cafe_sum_5000_max_price_avg                    -2.22e-01   3.33e-02  
cafe_avg_price_5000                            -2.23e-01   3.29e-02  
cafe_count_5000_na_price                        4.87e-01   2.30e-01  
cafe_count_5000_price_500                       5.09e-01   2.27e-01  
cafe_count_5000_price_1000                      5.31e-01   2.40e-01  
cafe_count_5000_price_1500                      4.85e-01   2.33e-01  
cafe_count_5000_price_2500                      4.32e-01   2.26e-01  
cafe_count_5000_price_4000                      3.80e-01   2.10e-01  
cafe_count_5000_price_high                      3.53e-01   2.14e-01  
big_church_count_5000                           4.83e-01   1.99e-01  
church_count_5000                               5.14e-01   2.13e-01  
mosque_count_5000                               2.42e-01   1.75e-01  
leisure_count_5000                              4.63e-01   2.00e-01  
sport_count_5000                                7.34e-01   2.95e-01  
market_count_5000                               1.00e+00   1.94e-01  
price_doc                                       1.94e-01   1.00e+00  

[276 rows x 276 columns]

Bring in the macro dataset


In [56]:
dataset = pysqldf("SELECT * FROM train as t inner join macro as m on t.timestamp = m.timestamp")

In [64]:
# shape
print(dataset.shape)


(30471, 392)

In [90]:
# Split-out validation dataset
#df.ix[:, df.columns != 'b']

array = dataset.values
X = array[:,2:3]  #X = array[:,dataset.columns != 'price_doc']
Y = array[:,291]
validation_size = 0.20
seed = 7
X_train, X_validation, Y_train, Y_validation = train_test_split(X, Y, test_size=validation_size, random_state=seed)

In [76]:
print(Y)


[5850000 6000000 5700000 ..., 6970959 13500000 5600000]

In [78]:
print(array)


[[1 '2011-08-20' 43 ..., 64.12 23587.0 230310.0]
 [2 '2011-08-23' 34 ..., 64.12 23587.0 230310.0]
 [3 '2011-08-27' 43 ..., 64.12 23587.0 230310.0]
 ..., 
 [30471 '2015-06-30' 45 ..., nan nan 234576.9]
 [30472 '2015-06-30' 64 ..., nan nan 234576.9]
 [30473 '2015-06-30' 43 ..., nan nan 234576.9]]

In [106]:
# Evaluate Algorithms
# Test options and evaluation metric
num_folds = 10
seed = 7
scoring = 'mean_squared_error'

In [107]:
# Spot Check Algorithms
models = []
models.append(('LR', LinearRegression()))
models.append(('LASSO', Lasso()))
models.append(('EN', ElasticNet()))
models.append(('KNN', KNeighborsRegressor()))
models.append(('CART', DecisionTreeRegressor()))
models.append(('SVR', SVR()))

In [108]:
# evaluate each model in turn
results = []
names = []
for name, model in models:
    kfold = KFold(n_splits=num_folds, random_state=seed)
    cv_results = cross_val_score(model, X_train, Y_train, cv=kfold, scoring=scoring)
    results.append(cv_results)
    names.append(name)
    msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
    print(msg)


C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
  sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
  sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
  sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
  sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
  sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
  sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
  sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
  sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
  sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
  sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
  sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
  sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
  sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
  sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
  sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
  sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
  sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
  sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
  sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
  sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
  sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
  sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
  sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
  sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
  sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
  sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
  sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
  sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
  sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
  sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
  sample_weight=sample_weight)
LR: -34711135356222.394531 (45272989269429.421875)
LASSO: -34711134909434.074219 (45272987785504.093750)
EN: -34683327938797.511719 (45187146918796.304688)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
  sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
  sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
  sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
  sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
  sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
  sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
  sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
  sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
  sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
  sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
  sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
  sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
  sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
  sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
  sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
  sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
  sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
  sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
  sample_weight=sample_weight)
KNN: -16158756355476.378906 (1830174867078.940918)
CART: -15415605741976.025391 (1608718215632.856689)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
  sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
  sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
  sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
  sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
  sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
  sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
  sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
  sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
  sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
  sample_weight=sample_weight)
SVR: -24119211273959.695312 (2915251321042.989746)

In [109]:
# Compare Algorithms
fig = pyplot.figure()
fig.suptitle('Algorithm Comparison')
ax = fig.add_subplot(111)
pyplot.boxplot(results)
ax.set_xticklabels(names)
pyplot.show()



In [95]:
# Standardize the dataset
pipelines = []
pipelines.append(('ScaledLR', Pipeline([('Scaler', StandardScaler()),('LR', LinearRegression())])))
pipelines.append(('ScaledLASSO', Pipeline([('Scaler', StandardScaler()),('LASSO', Lasso())])))
pipelines.append(('ScaledEN', Pipeline([('Scaler', StandardScaler()),('EN', ElasticNet())])))
pipelines.append(('ScaledKNN', Pipeline([('Scaler', StandardScaler()),('KNN', KNeighborsRegressor())])))
pipelines.append(('ScaledCART', Pipeline([('Scaler', StandardScaler()),('CART', DecisionTreeRegressor())])))
pipelines.append(('ScaledSVR', Pipeline([('Scaler', StandardScaler()),('SVR', SVR())])))
results = []
names = []
for name, model in pipelines:
	kfold = KFold(n_splits=num_folds, random_state=seed)
	cv_results = cross_val_score(model, X_train, Y_train, cv=kfold, scoring=scoring)
	results.append(cv_results)
	names.append(name)
	msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
	print(msg)


C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
ScaledLR: -34711135356222.406250 (45272989269429.437500)
ScaledLASSO: -34711125816995.992188 (45272954506018.281250)
ScaledEN: -27200571220099.667969 (19544737736403.980469)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
ScaledKNN: -16152756589672.191406 (1740203435588.155518)
ScaledCART: -15371550460683.593750 (1583210166537.577881)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
ScaledSVR: -24114630241543.226562 (2915587084734.815430)

In [96]:
# Compare Algorithms
fig = pyplot.figure()
fig.suptitle('Scaled Algorithm Comparison')
ax = fig.add_subplot(111)
pyplot.boxplot(results)
ax.set_xticklabels(names)
pyplot.show()



In [97]:
# KNN Algorithm tuning
scaler = StandardScaler().fit(X_train)
rescaledX = scaler.transform(X_train)
k_values = numpy.array([1,3,5,7,9,11,13,15,17,19,21])
param_grid = dict(n_neighbors=k_values)
model = KNeighborsRegressor()
kfold = KFold(n_splits=num_folds, random_state=seed)
grid = GridSearchCV(estimator=model, param_grid=param_grid, scoring=scoring, cv=kfold)
grid_result = grid.fit(rescaledX, Y_train)


C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)

In [98]:
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))


Best: -14596345046410.951172 using {'n_neighbors': 21}
-26497392409359.203125 (2361332515441.965820) with: {'n_neighbors': 1}
-17622692288782.410156 (1872238336256.748047) with: {'n_neighbors': 3}
-16112985526352.214844 (1842869613360.297119) with: {'n_neighbors': 5}
-15177572996765.607422 (1559426255986.728271) with: {'n_neighbors': 7}
-14943419853364.791016 (1575532612677.548096) with: {'n_neighbors': 9}
-14978953269019.136719 (1745870368172.335449) with: {'n_neighbors': 11}
-14727470323261.951172 (1627753392377.241455) with: {'n_neighbors': 13}
-14679668617321.728516 (1646138821268.150879) with: {'n_neighbors': 15}
-14630239174528.425781 (1618557504403.530273) with: {'n_neighbors': 17}
-14639421057143.099609 (1562043429176.597168) with: {'n_neighbors': 19}
-14596345046410.951172 (1532127548398.131836) with: {'n_neighbors': 21}

In [99]:
# ensembles
ensembles = []
ensembles.append(('ScaledAB', Pipeline([('Scaler', StandardScaler()),('AB', AdaBoostRegressor())])))
ensembles.append(('ScaledGBM', Pipeline([('Scaler', StandardScaler()),('GBM', GradientBoostingRegressor())])))
ensembles.append(('ScaledRF', Pipeline([('Scaler', StandardScaler()),('RF', RandomForestRegressor())])))
ensembles.append(('ScaledET', Pipeline([('Scaler', StandardScaler()),('ET', ExtraTreesRegressor())])))
results = []
names = []
for name, model in ensembles:
	kfold = KFold(n_splits=num_folds, random_state=seed)
	cv_results = cross_val_score(model, X_train, Y_train, cv=kfold, scoring=scoring)
	results.append(cv_results)
	names.append(name)
	msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
	print(msg)


C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
ScaledAB: -15825171304182.421875 (1657486073931.083008)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
ScaledGBM: -14349558101245.152344 (1422625638623.679199)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
ScaledRF: -14613902738748.160156 (1613183452045.770264)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
ScaledET: -14864331327304.714844 (1683386353177.617920)

In [100]:
# Compare Algorithms
fig = pyplot.figure()
fig.suptitle('Scaled Ensemble Algorithm Comparison')
ax = fig.add_subplot(111)
pyplot.boxplot(results)
ax.set_xticklabels(names)
pyplot.show()



In [101]:
# Tune scaled GBM
scaler = StandardScaler().fit(X_train)
rescaledX = scaler.transform(X_train)
param_grid = dict(n_estimators=numpy.array([50,100,150,200,250,300,350,400]))
model = GradientBoostingRegressor(random_state=seed)
kfold = KFold(n_splits=num_folds, random_state=seed)
grid = GridSearchCV(estimator=model, param_grid=param_grid, scoring=scoring, cv=kfold)
grid_result = grid.fit(rescaledX, Y_train)

print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))


C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
Best: -14111937354387.152344 using {'n_estimators': 50}
-14111937354387.152344 (1544746649249.499023) with: {'n_estimators': 50}
-14361387459812.384766 (1414919657662.410400) with: {'n_estimators': 100}
-14640703301797.687500 (1443128276519.089844) with: {'n_estimators': 150}
-14789902518443.281250 (1468842412825.094971) with: {'n_estimators': 200}
-14892872571699.949219 (1476677639653.514404) with: {'n_estimators': 250}
-14981067896095.742188 (1461347251429.686523) with: {'n_estimators': 300}
-15062517628067.623047 (1483039554772.742188) with: {'n_estimators': 350}
-15108379792398.576172 (1500342843831.599854) with: {'n_estimators': 400}

In [102]:
# Make predictions on validation dataset

# prepare the model
scaler = StandardScaler().fit(X_train)
rescaledX = scaler.transform(X_train)
model = GradientBoostingRegressor(random_state=seed, n_estimators=400)
model.fit(rescaledX, Y_train)
# transform the validation dataset
rescaledValidationX = scaler.transform(X_validation)
predictions = model.predict(rescaledValidationX)
print(mean_squared_error(Y_validation, predictions))


C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
  warnings.warn(msg, _DataConversionWarning)
1.23132209471e+13

In [ ]: