In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LinearRegression, LassoCV, RidgeCV, Lasso
from sklearn.metrics import mean_squared_error
import xgboost as xgb
import seaborn as sns
pd.set_option('display.max_columns', None)
%matplotlib inline
In [2]:
df = pd.read_csv("data/kaggle-house-prices/data_combined_cleaned.csv")
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2919 entries, 0 to 2918
Data columns (total 80 columns):
Id 2919 non-null int64
MSSubClass 2919 non-null int64
MSZoning 2919 non-null object
LotFrontage 2919 non-null float64
LotArea 2919 non-null int64
Street 2919 non-null object
Alley 2919 non-null object
LotShape 2919 non-null object
LandContour 2919 non-null object
LotConfig 2919 non-null object
LandSlope 2919 non-null object
Neighborhood 2919 non-null object
Condition1 2919 non-null object
Condition2 2919 non-null object
BldgType 2919 non-null object
HouseStyle 2919 non-null object
OverallQual 2919 non-null int64
OverallCond 2919 non-null int64
YearBuilt 2919 non-null int64
YearRemodAdd 2919 non-null int64
RoofStyle 2919 non-null object
RoofMatl 2919 non-null object
Exterior1st 2919 non-null object
Exterior2nd 2919 non-null object
MasVnrType 2919 non-null object
MasVnrArea 2919 non-null float64
ExterQual 2919 non-null object
ExterCond 2919 non-null object
Foundation 2919 non-null object
BsmtQual 2919 non-null object
BsmtCond 2919 non-null object
BsmtExposure 2919 non-null object
BsmtFinType1 2919 non-null object
BsmtFinSF1 2919 non-null float64
BsmtFinType2 2919 non-null object
BsmtFinSF2 2919 non-null float64
BsmtUnfSF 2919 non-null float64
TotalBsmtSF 2919 non-null float64
Heating 2919 non-null object
HeatingQC 2919 non-null object
CentralAir 2919 non-null object
Electrical 2919 non-null object
1stFlrSF 2919 non-null int64
2ndFlrSF 2919 non-null int64
LowQualFinSF 2919 non-null int64
GrLivArea 2919 non-null int64
BsmtFullBath 2919 non-null float64
BsmtHalfBath 2919 non-null float64
FullBath 2919 non-null int64
HalfBath 2919 non-null int64
BedroomAbvGr 2919 non-null int64
KitchenAbvGr 2919 non-null int64
KitchenQual 2919 non-null object
TotRmsAbvGrd 2919 non-null int64
Functional 2919 non-null object
Fireplaces 2919 non-null int64
FireplaceQu 2919 non-null object
GarageType 2919 non-null object
GarageYrBlt 2919 non-null float64
GarageFinish 2919 non-null object
GarageCars 2919 non-null float64
GarageArea 2919 non-null float64
GarageQual 2919 non-null object
GarageCond 2919 non-null object
PavedDrive 2919 non-null object
WoodDeckSF 2919 non-null int64
OpenPorchSF 2919 non-null int64
EnclosedPorch 2919 non-null int64
3SsnPorch 2919 non-null int64
ScreenPorch 2919 non-null int64
PoolArea 2919 non-null int64
PoolQC 2919 non-null object
Fence 2919 non-null object
MiscFeature 2919 non-null object
MiscVal 2919 non-null int64
MoSold 2919 non-null int64
YrSold 2919 non-null int64
SaleType 2919 non-null object
SaleCondition 2919 non-null object
SalesPrice 1460 non-null float64
dtypes: float64(12), int64(26), object(42)
memory usage: 1.8+ MB
In [3]:
df.head(10)
Out[3]:
Id
MSSubClass
MSZoning
LotFrontage
LotArea
Street
Alley
LotShape
LandContour
LotConfig
LandSlope
Neighborhood
Condition1
Condition2
BldgType
HouseStyle
OverallQual
OverallCond
YearBuilt
YearRemodAdd
RoofStyle
RoofMatl
Exterior1st
Exterior2nd
MasVnrType
MasVnrArea
ExterQual
ExterCond
Foundation
BsmtQual
BsmtCond
BsmtExposure
BsmtFinType1
BsmtFinSF1
BsmtFinType2
BsmtFinSF2
BsmtUnfSF
TotalBsmtSF
Heating
HeatingQC
CentralAir
Electrical
1stFlrSF
2ndFlrSF
LowQualFinSF
GrLivArea
BsmtFullBath
BsmtHalfBath
FullBath
HalfBath
BedroomAbvGr
KitchenAbvGr
KitchenQual
TotRmsAbvGrd
Functional
Fireplaces
FireplaceQu
GarageType
GarageYrBlt
GarageFinish
GarageCars
GarageArea
GarageQual
GarageCond
PavedDrive
WoodDeckSF
OpenPorchSF
EnclosedPorch
3SsnPorch
ScreenPorch
PoolArea
PoolQC
Fence
MiscFeature
MiscVal
MoSold
YrSold
SaleType
SaleCondition
SalesPrice
0
1
60
RL
65.0
8450
Pave
None
Reg
Lvl
Inside
Gtl
CollgCr
Norm
Norm
1Fam
2Story
7
5
2003
2003
Gable
CompShg
VinylSd
VinylSd
BrkFace
196.0
Gd
TA
PConc
Gd
TA
No
GLQ
706.0
Unf
0.0
150.0
856.0
GasA
Ex
Y
SBrkr
856
854
0
1710
1.0
0.0
2
1
3
1
Gd
8
Typ
0
None
Attchd
2003.0
RFn
2.0
548.0
TA
TA
Y
0
61
0
0
0
0
None
None
None
0
2
2008
WD
Normal
208500.0
1
2
20
RL
80.0
9600
Pave
None
Reg
Lvl
FR2
Gtl
Veenker
Feedr
Norm
1Fam
1Story
6
8
1976
1976
Gable
CompShg
MetalSd
MetalSd
None
0.0
TA
TA
CBlock
Gd
TA
Gd
ALQ
978.0
Unf
0.0
284.0
1262.0
GasA
Ex
Y
SBrkr
1262
0
0
1262
0.0
1.0
2
0
3
1
TA
6
Typ
1
TA
Attchd
1976.0
RFn
2.0
460.0
TA
TA
Y
298
0
0
0
0
0
None
None
None
0
5
2007
WD
Normal
181500.0
2
3
60
RL
68.0
11250
Pave
None
IR1
Lvl
Inside
Gtl
CollgCr
Norm
Norm
1Fam
2Story
7
5
2001
2002
Gable
CompShg
VinylSd
VinylSd
BrkFace
162.0
Gd
TA
PConc
Gd
TA
Mn
GLQ
486.0
Unf
0.0
434.0
920.0
GasA
Ex
Y
SBrkr
920
866
0
1786
1.0
0.0
2
1
3
1
Gd
6
Typ
1
TA
Attchd
2001.0
RFn
2.0
608.0
TA
TA
Y
0
42
0
0
0
0
None
None
None
0
9
2008
WD
Normal
223500.0
3
4
70
RL
60.0
9550
Pave
None
IR1
Lvl
Corner
Gtl
Crawfor
Norm
Norm
1Fam
2Story
7
5
1915
1970
Gable
CompShg
Wd Sdng
Wd Shng
None
0.0
TA
TA
BrkTil
TA
Gd
No
ALQ
216.0
Unf
0.0
540.0
756.0
GasA
Gd
Y
SBrkr
961
756
0
1717
1.0
0.0
1
0
3
1
Gd
7
Typ
1
Gd
Detchd
1998.0
Unf
3.0
642.0
TA
TA
Y
0
35
272
0
0
0
None
None
None
0
2
2006
WD
Abnorml
140000.0
4
5
60
RL
84.0
14260
Pave
None
IR1
Lvl
FR2
Gtl
NoRidge
Norm
Norm
1Fam
2Story
8
5
2000
2000
Gable
CompShg
VinylSd
VinylSd
BrkFace
350.0
Gd
TA
PConc
Gd
TA
Av
GLQ
655.0
Unf
0.0
490.0
1145.0
GasA
Ex
Y
SBrkr
1145
1053
0
2198
1.0
0.0
2
1
4
1
Gd
9
Typ
1
TA
Attchd
2000.0
RFn
3.0
836.0
TA
TA
Y
192
84
0
0
0
0
None
None
None
0
12
2008
WD
Normal
250000.0
5
6
50
RL
85.0
14115
Pave
None
IR1
Lvl
Inside
Gtl
Mitchel
Norm
Norm
1Fam
1.5Fin
5
5
1993
1995
Gable
CompShg
VinylSd
VinylSd
None
0.0
TA
TA
Wood
Gd
TA
No
GLQ
732.0
Unf
0.0
64.0
796.0
GasA
Ex
Y
SBrkr
796
566
0
1362
1.0
0.0
1
1
1
1
TA
5
Typ
0
None
Attchd
1993.0
Unf
2.0
480.0
TA
TA
Y
40
30
0
320
0
0
None
MnPrv
Shed
700
10
2009
WD
Normal
143000.0
6
7
20
RL
75.0
10084
Pave
None
Reg
Lvl
Inside
Gtl
Somerst
Norm
Norm
1Fam
1Story
8
5
2004
2005
Gable
CompShg
VinylSd
VinylSd
Stone
186.0
Gd
TA
PConc
Ex
TA
Av
GLQ
1369.0
Unf
0.0
317.0
1686.0
GasA
Ex
Y
SBrkr
1694
0
0
1694
1.0
0.0
2
0
3
1
Gd
7
Typ
1
Gd
Attchd
2004.0
RFn
2.0
636.0
TA
TA
Y
255
57
0
0
0
0
None
None
None
0
8
2007
WD
Normal
307000.0
7
8
60
RL
80.0
10382
Pave
None
IR1
Lvl
Corner
Gtl
NWAmes
PosN
Norm
1Fam
2Story
7
6
1973
1973
Gable
CompShg
HdBoard
HdBoard
Stone
240.0
TA
TA
CBlock
Gd
TA
Mn
ALQ
859.0
BLQ
32.0
216.0
1107.0
GasA
Ex
Y
SBrkr
1107
983
0
2090
1.0
0.0
2
1
3
1
TA
7
Typ
2
TA
Attchd
1973.0
RFn
2.0
484.0
TA
TA
Y
235
204
228
0
0
0
None
None
Shed
350
11
2009
WD
Normal
200000.0
8
9
50
RM
51.0
6120
Pave
None
Reg
Lvl
Inside
Gtl
OldTown
Artery
Norm
1Fam
1.5Fin
7
5
1931
1950
Gable
CompShg
BrkFace
Wd Shng
None
0.0
TA
TA
BrkTil
TA
TA
No
Unf
0.0
Unf
0.0
952.0
952.0
GasA
Gd
Y
FuseF
1022
752
0
1774
0.0
0.0
2
0
2
2
TA
8
Min1
2
TA
Detchd
1931.0
Unf
2.0
468.0
Fa
TA
Y
90
0
205
0
0
0
None
None
None
0
4
2008
WD
Abnorml
129900.0
9
10
190
RL
50.0
7420
Pave
None
Reg
Lvl
Corner
Gtl
BrkSide
Artery
Artery
2fmCon
1.5Unf
5
6
1939
1950
Gable
CompShg
MetalSd
MetalSd
None
0.0
TA
TA
BrkTil
TA
TA
No
GLQ
851.0
Unf
0.0
140.0
991.0
GasA
Ex
Y
SBrkr
1077
0
0
1077
1.0
0.0
1
0
2
2
TA
5
Typ
2
TA
Attchd
1939.0
RFn
1.0
205.0
Gd
TA
Y
0
4
0
0
0
0
None
None
None
0
1
2008
WD
Normal
118000.0
In [4]:
df_dummy = pd.get_dummies(df, drop_first=True)
df_dummy.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2919 entries, 0 to 2918
Columns: 261 entries, Id to SaleCondition_Partial
dtypes: float64(12), int64(26), uint8(223)
memory usage: 1.5 MB
In [5]:
df_training = df_dummy[~np.isnan(df.SalesPrice)]
df_testing = df_dummy[np.isnan(df.SalesPrice)]
df_training.shape, df_testing.shape
Out[5]:
((1460, 261), (1459, 261))
In [6]:
plt.subplot(1, 2, 1)
df_training.SalesPrice.hist(bins = 100)
plt.subplot(1, 2, 2)
df_training.SalesPrice.plot.box()
plt.tight_layout()
In [7]:
y = np.log(df_training.SalesPrice.values)
df_tmp = df_training.copy()
del df_tmp["SalesPrice"]
del df_tmp["Id"]
X = df_tmp.values
df_tmp.head(4)
Out[7]:
MSSubClass
LotFrontage
LotArea
OverallQual
OverallCond
YearBuilt
YearRemodAdd
MasVnrArea
BsmtFinSF1
BsmtFinSF2
BsmtUnfSF
TotalBsmtSF
1stFlrSF
2ndFlrSF
LowQualFinSF
GrLivArea
BsmtFullBath
BsmtHalfBath
FullBath
HalfBath
BedroomAbvGr
KitchenAbvGr
TotRmsAbvGrd
Fireplaces
GarageYrBlt
GarageCars
GarageArea
WoodDeckSF
OpenPorchSF
EnclosedPorch
3SsnPorch
ScreenPorch
PoolArea
MiscVal
MoSold
YrSold
MSZoning_FV
MSZoning_RH
MSZoning_RL
MSZoning_RM
Street_Pave
Alley_None
Alley_Pave
LotShape_IR2
LotShape_IR3
LotShape_Reg
LandContour_HLS
LandContour_Low
LandContour_Lvl
LotConfig_CulDSac
LotConfig_FR2
LotConfig_FR3
LotConfig_Inside
LandSlope_Mod
LandSlope_Sev
Neighborhood_Blueste
Neighborhood_BrDale
Neighborhood_BrkSide
Neighborhood_ClearCr
Neighborhood_CollgCr
Neighborhood_Crawfor
Neighborhood_Edwards
Neighborhood_Gilbert
Neighborhood_IDOTRR
Neighborhood_MeadowV
Neighborhood_Mitchel
Neighborhood_NAmes
Neighborhood_NPkVill
Neighborhood_NWAmes
Neighborhood_NoRidge
Neighborhood_NridgHt
Neighborhood_OldTown
Neighborhood_SWISU
Neighborhood_Sawyer
Neighborhood_SawyerW
Neighborhood_Somerst
Neighborhood_StoneBr
Neighborhood_Timber
Neighborhood_Veenker
Condition1_Feedr
Condition1_Norm
Condition1_PosA
Condition1_PosN
Condition1_RRAe
Condition1_RRAn
Condition1_RRNe
Condition1_RRNn
Condition2_Feedr
Condition2_Norm
Condition2_PosA
Condition2_PosN
Condition2_RRAe
Condition2_RRAn
Condition2_RRNn
BldgType_2fmCon
BldgType_Duplex
BldgType_Twnhs
BldgType_TwnhsE
HouseStyle_1.5Unf
HouseStyle_1Story
HouseStyle_2.5Fin
HouseStyle_2.5Unf
HouseStyle_2Story
HouseStyle_SFoyer
HouseStyle_SLvl
RoofStyle_Gable
RoofStyle_Gambrel
RoofStyle_Hip
RoofStyle_Mansard
RoofStyle_Shed
RoofMatl_CompShg
RoofMatl_Membran
RoofMatl_Metal
RoofMatl_Roll
RoofMatl_Tar&Grv
RoofMatl_WdShake
RoofMatl_WdShngl
Exterior1st_AsphShn
Exterior1st_BrkComm
Exterior1st_BrkFace
Exterior1st_CBlock
Exterior1st_CemntBd
Exterior1st_HdBoard
Exterior1st_ImStucc
Exterior1st_MetalSd
Exterior1st_Other
Exterior1st_Plywood
Exterior1st_Stone
Exterior1st_Stucco
Exterior1st_VinylSd
Exterior1st_Wd Sdng
Exterior1st_WdShing
Exterior2nd_AsphShn
Exterior2nd_Brk Cmn
Exterior2nd_BrkFace
Exterior2nd_CBlock
Exterior2nd_CmentBd
Exterior2nd_HdBoard
Exterior2nd_ImStucc
Exterior2nd_MetalSd
Exterior2nd_Other
Exterior2nd_Plywood
Exterior2nd_Stone
Exterior2nd_Stucco
Exterior2nd_VinylSd
Exterior2nd_Wd Sdng
Exterior2nd_Wd Shng
MasVnrType_BrkFace
MasVnrType_None
MasVnrType_Stone
ExterQual_Fa
ExterQual_Gd
ExterQual_TA
ExterCond_Fa
ExterCond_Gd
ExterCond_Po
ExterCond_TA
Foundation_CBlock
Foundation_PConc
Foundation_Slab
Foundation_Stone
Foundation_Wood
BsmtQual_Fa
BsmtQual_Gd
BsmtQual_None
BsmtQual_TA
BsmtCond_Gd
BsmtCond_None
BsmtCond_Po
BsmtCond_TA
BsmtExposure_Gd
BsmtExposure_Mn
BsmtExposure_No
BsmtExposure_None
BsmtFinType1_BLQ
BsmtFinType1_GLQ
BsmtFinType1_LwQ
BsmtFinType1_None
BsmtFinType1_Rec
BsmtFinType1_Unf
BsmtFinType2_BLQ
BsmtFinType2_GLQ
BsmtFinType2_LwQ
BsmtFinType2_None
BsmtFinType2_Rec
BsmtFinType2_Unf
Heating_GasA
Heating_GasW
Heating_Grav
Heating_OthW
Heating_Wall
HeatingQC_Fa
HeatingQC_Gd
HeatingQC_Po
HeatingQC_TA
CentralAir_Y
Electrical_FuseF
Electrical_FuseP
Electrical_Mix
Electrical_SBrkr
KitchenQual_Fa
KitchenQual_Gd
KitchenQual_TA
Functional_Maj2
Functional_Min1
Functional_Min2
Functional_Mod
Functional_Sev
Functional_Typ
FireplaceQu_Fa
FireplaceQu_Gd
FireplaceQu_None
FireplaceQu_Po
FireplaceQu_TA
GarageType_Attchd
GarageType_Basment
GarageType_BuiltIn
GarageType_CarPort
GarageType_Detchd
GarageType_None
GarageFinish_None
GarageFinish_RFn
GarageFinish_Unf
GarageQual_Fa
GarageQual_Gd
GarageQual_None
GarageQual_Po
GarageQual_TA
GarageCond_Fa
GarageCond_Gd
GarageCond_None
GarageCond_Po
GarageCond_TA
PavedDrive_P
PavedDrive_Y
PoolQC_Fa
PoolQC_Gd
PoolQC_None
Fence_GdWo
Fence_MnPrv
Fence_MnWw
Fence_None
MiscFeature_None
MiscFeature_Othr
MiscFeature_Shed
MiscFeature_TenC
SaleType_CWD
SaleType_Con
SaleType_ConLD
SaleType_ConLI
SaleType_ConLw
SaleType_New
SaleType_Oth
SaleType_WD
SaleCondition_AdjLand
SaleCondition_Alloca
SaleCondition_Family
SaleCondition_Normal
SaleCondition_Partial
0
60
65.0
8450
7
5
2003
2003
196.0
706.0
0.0
150.0
856.0
856
854
0
1710
1.0
0.0
2
1
3
1
8
0
2003.0
2.0
548.0
0
61
0
0
0
0
0
2
2008
0
0
1
0
1
1
0
0
0
1
0
0
1
0
0
0
1
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
1
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
1
0
0
0
1
0
0
0
0
1
0
1
0
0
0
0
1
0
0
0
0
0
1
0
0
1
0
0
1
0
0
0
0
0
0
0
0
0
1
1
0
0
0
0
0
0
0
0
1
0
0
0
1
0
1
0
0
0
0
0
0
1
0
0
1
0
0
1
0
0
0
0
0
0
1
0
0
0
0
0
1
0
0
0
0
1
0
1
0
0
1
0
0
0
1
1
0
0
0
0
0
0
0
0
0
0
1
0
0
0
1
0
1
20
80.0
9600
6
8
1976
1976
0.0
978.0
0.0
284.0
1262.0
1262
0
0
1262
0.0
1.0
2
0
3
1
6
1
1976.0
2.0
460.0
298
0
0
0
0
0
0
5
2007
0
0
1
0
1
1
0
0
0
1
0
0
1
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
1
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
1
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
1
0
0
0
1
0
0
0
1
1
0
0
0
0
0
1
0
0
0
0
0
1
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
1
0
0
0
0
0
0
0
0
1
0
0
0
1
0
0
1
0
0
0
0
0
1
0
0
0
0
1
1
0
0
0
0
0
0
1
0
0
0
0
0
1
0
0
0
0
1
0
1
0
0
1
0
0
0
1
1
0
0
0
0
0
0
0
0
0
0
1
0
0
0
1
0
2
60
68.0
11250
7
5
2001
2002
162.0
486.0
0.0
434.0
920.0
920
866
0
1786
1.0
0.0
2
1
3
1
6
1
2001.0
2.0
608.0
0
42
0
0
0
0
0
9
2008
0
0
1
0
1
1
0
0
0
0
0
0
1
0
0
0
1
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
1
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
1
0
0
0
1
0
0
0
0
1
0
1
0
0
0
0
1
0
0
0
0
0
1
0
1
0
0
0
1
0
0
0
0
0
0
0
0
0
1
1
0
0
0
0
0
0
0
0
1
0
0
0
1
0
1
0
0
0
0
0
0
1
0
0
0
0
1
1
0
0
0
0
0
0
1
0
0
0
0
0
1
0
0
0
0
1
0
1
0
0
1
0
0
0
1
1
0
0
0
0
0
0
0
0
0
0
1
0
0
0
1
0
3
70
60.0
9550
7
5
1915
1970
0.0
216.0
0.0
540.0
756.0
961
756
0
1717
1.0
0.0
1
0
3
1
7
1
1998.0
3.0
642.0
0
35
272
0
0
0
0
2
2006
0
0
1
0
1
1
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
1
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
1
0
0
0
1
0
0
0
1
0
0
0
0
0
0
0
0
1
1
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
1
1
0
0
0
0
0
1
0
0
1
0
0
0
1
0
1
0
0
0
0
0
0
1
0
1
0
0
0
0
0
0
0
1
0
0
0
1
0
0
0
0
1
0
0
0
0
1
0
1
0
0
1
0
0
0
1
1
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
In [8]:
plt.subplot(1, 2, 1)
pd.Series(y).plot.hist(bins = 100)
plt.subplot(1, 2, 2)
pd.Series(y).plot.box()
plt.tight_layout()
In [9]:
pd.DataFrame(X).describe()
Out[9]:
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
count
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.00000
1460.000000
1460.00000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.0
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
1460.000000
mean
56.897260
70.176370
10516.828082
6.099315
5.575342
1971.267808
1984.865753
103.117123
443.639726
46.549315
567.240411
1057.429452
1162.626712
346.992466
5.844521
1515.463699
0.425342
0.057534
1.565068
0.382877
2.866438
1.046575
6.517808
0.613014
1976.507534
1.767123
472.980137
94.244521
46.660274
21.954110
3.409589
15.060959
2.758904
43.489041
6.321918
2007.815753
0.044521
0.010959
0.788356
0.149315
0.995890
0.937671
0.028082
0.028082
0.006849
0.633562
0.034247
0.024658
0.897945
0.064384
0.032192
0.002740
0.720548
0.044521
0.008904
0.001370
0.010959
0.039726
0.019178
0.102740
0.034932
0.068493
0.054110
0.025342
0.011644
0.033562
0.154110
0.006164
0.05000
0.028082
0.05274
0.077397
0.017123
0.050685
0.040411
0.058904
0.017123
0.026027
0.007534
0.055479
0.863014
0.005479
0.013014
0.007534
0.017808
0.001370
0.003425
0.004110
0.989726
0.000685
0.001370
0.000685
0.000685
0.001370
0.021233
0.035616
0.029452
0.078082
0.009589
0.497260
0.005479
0.007534
0.304795
0.025342
0.044521
0.781507
0.007534
0.195890
0.004795
0.001370
0.982192
0.000685
0.000685
0.000685
0.007534
0.003425
0.004110
0.000685
0.001370
0.034247
0.000685
0.041781
0.152055
0.000685
0.150685
0.0
0.073973
0.001370
0.017123
0.352740
0.141096
0.017808
0.002055
0.004795
0.017123
0.000685
0.041096
0.141781
0.006849
0.146575
0.000685
0.097260
0.003425
0.017808
0.345205
0.134932
0.026027
0.304795
0.597260
0.087671
0.009589
0.334247
0.620548
0.019178
0.100000
0.000685
0.878082
0.434247
0.443151
0.016438
0.004110
0.002055
0.023973
0.423288
0.025342
0.444521
0.044521
0.025342
0.001370
0.897945
0.091781
0.078082
0.652740
0.026027
0.101370
0.286301
0.050685
0.025342
0.091096
0.294521
0.022603
0.009589
0.031507
0.026027
0.036986
0.860274
0.978082
0.012329
0.004795
0.001370
0.002740
0.033562
0.165068
0.000685
0.293151
0.934932
0.018493
0.002055
0.000685
0.914384
0.026712
0.401370
0.503425
0.003425
0.021233
0.023288
0.010274
0.000685
0.931507
0.022603
0.260274
0.472603
0.013699
0.214384
0.595890
0.013014
0.060274
0.006164
0.265068
0.055479
0.055479
0.289041
0.414384
0.032877
0.009589
0.055479
0.002055
0.897945
0.023973
0.006164
0.055479
0.004795
0.908219
0.020548
0.917808
0.001370
0.002055
0.995205
0.036986
0.107534
0.007534
0.807534
0.963014
0.001370
0.033562
0.000685
0.002740
0.001370
0.006164
0.003425
0.003425
0.083562
0.002055
0.867808
0.002740
0.008219
0.013699
0.820548
0.085616
std
42.300571
22.433457
9981.264932
1.382997
1.112799
30.202904
20.645407
180.731373
456.098091
161.319273
441.866955
438.705324
386.587738
436.528436
48.623081
525.480383
0.518911
0.238753
0.550916
0.502885
0.815778
0.220338
1.625393
0.644666
26.306739
0.747315
213.804841
125.338794
66.256028
61.119149
29.317331
55.757415
40.177307
496.123024
2.703626
1.328095
0.206319
0.104145
0.408614
0.356521
0.063996
0.241835
0.165264
0.165264
0.082505
0.481996
0.181924
0.155132
0.302824
0.245519
0.176570
0.052289
0.448884
0.206319
0.093973
0.036999
0.104145
0.195382
0.137198
0.303723
0.183669
0.252677
0.226311
0.157217
0.107313
0.180160
0.361177
0.078298
0.21802
0.165264
0.22359
0.267312
0.129775
0.219429
0.196989
0.235526
0.129775
0.159271
0.086502
0.228992
0.343951
0.073846
0.113372
0.086502
0.132299
0.036999
0.058440
0.063996
0.100873
0.026171
0.036999
0.026171
0.026171
0.036999
0.144209
0.185395
0.169128
0.268393
0.097486
0.500164
0.073846
0.086502
0.460478
0.157217
0.206319
0.413365
0.086502
0.397021
0.069100
0.036999
0.132299
0.026171
0.026171
0.026171
0.086502
0.058440
0.063996
0.026171
0.036999
0.181924
0.026171
0.200157
0.359197
0.026171
0.357864
0.0
0.261816
0.036999
0.129775
0.477986
0.348240
0.132299
0.045299
0.069100
0.129775
0.026171
0.198580
0.348945
0.082505
0.353803
0.026171
0.296413
0.058440
0.132299
0.475598
0.341767
0.159271
0.460478
0.490617
0.282913
0.097486
0.471888
0.485417
0.137198
0.300103
0.026171
0.327303
0.495827
0.496928
0.127198
0.063996
0.045299
0.153016
0.494249
0.157217
0.497083
0.206319
0.157217
0.036999
0.302824
0.288815
0.268393
0.476262
0.159271
0.301921
0.452187
0.219429
0.157217
0.287844
0.455983
0.148684
0.097486
0.174743
0.159271
0.188793
0.346821
0.146465
0.110386
0.069100
0.036999
0.052289
0.180160
0.371370
0.026171
0.455363
0.246731
0.134772
0.045299
0.026171
0.279893
0.161297
0.490344
0.500160
0.058440
0.144209
0.150867
0.100873
0.026171
0.252677
0.148684
0.438934
0.499420
0.116277
0.410535
0.490887
0.113372
0.238075
0.078298
0.441521
0.228992
0.228992
0.453472
0.492784
0.178375
0.097486
0.228992
0.045299
0.302824
0.153016
0.078298
0.228992
0.069100
0.288815
0.141914
0.274751
0.036999
0.045299
0.069100
0.188793
0.309897
0.086502
0.394372
0.188793
0.036999
0.180160
0.026171
0.052289
0.036999
0.078298
0.058440
0.058440
0.276824
0.045299
0.338815
0.052289
0.090317
0.116277
0.383862
0.279893
min
20.000000
21.000000
1300.000000
1.000000
1.000000
1872.000000
1950.000000
0.000000
0.000000
0.000000
0.000000
0.000000
334.000000
0.000000
0.000000
334.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
2.000000
0.000000
1872.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
1.000000
2006.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.00000
0.000000
0.00000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.0
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
25%
20.000000
60.000000
7553.500000
5.000000
5.000000
1954.000000
1967.000000
0.000000
0.000000
0.000000
223.000000
795.750000
882.000000
0.000000
0.000000
1129.500000
0.000000
0.000000
1.000000
0.000000
2.000000
1.000000
5.000000
0.000000
1959.000000
1.000000
334.500000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
5.000000
2007.000000
0.000000
0.000000
1.000000
0.000000
1.000000
1.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
1.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.00000
0.000000
0.00000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
1.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
1.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
1.000000
0.000000
0.000000
0.000000
0.000000
1.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.0
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
1.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
1.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
1.000000
1.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
1.000000
0.000000
0.000000
0.000000
1.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
1.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
1.000000
0.000000
0.000000
0.000000
0.000000
1.000000
0.000000
1.000000
0.000000
0.000000
1.000000
0.000000
0.000000
0.000000
1.000000
1.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
1.000000
0.000000
0.000000
0.000000
1.000000
0.000000
50%
50.000000
70.000000
9478.500000
6.000000
5.000000
1973.000000
1994.000000
0.000000
383.500000
0.000000
477.500000
991.500000
1087.000000
0.000000
0.000000
1464.000000
0.000000
0.000000
2.000000
0.000000
3.000000
1.000000
6.000000
1.000000
1978.000000
2.000000
480.000000
0.000000
25.000000
0.000000
0.000000
0.000000
0.000000
0.000000
6.000000
2008.000000
0.000000
0.000000
1.000000
0.000000
1.000000
1.000000
0.000000
0.000000
0.000000
1.000000
0.000000
0.000000
1.000000
0.000000
0.000000
0.000000
1.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.00000
0.000000
0.00000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
1.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
1.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
1.000000
0.000000
0.000000
0.000000
0.000000
1.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.0
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
1.000000
0.000000
0.000000
0.000000
1.000000
0.000000
0.000000
0.000000
1.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
1.000000
0.000000
0.000000
1.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
1.000000
1.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
1.000000
0.000000
0.000000
0.000000
1.000000
0.000000
0.000000
1.000000
0.000000
0.000000
0.000000
0.000000
0.000000
1.000000
0.000000
0.000000
0.000000
0.000000
0.000000
1.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
1.000000
0.000000
0.000000
0.000000
0.000000
1.000000
0.000000
1.000000
0.000000
0.000000
1.000000
0.000000
0.000000
0.000000
1.000000
1.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
1.000000
0.000000
0.000000
0.000000
1.000000
0.000000
75%
70.000000
80.000000
11601.500000
7.000000
6.000000
2000.000000
2004.000000
164.250000
712.250000
0.000000
808.000000
1298.250000
1391.250000
728.000000
0.000000
1776.750000
1.000000
0.000000
2.000000
1.000000
3.000000
1.000000
7.000000
1.000000
2001.000000
2.000000
576.000000
168.000000
68.000000
0.000000
0.000000
0.000000
0.000000
0.000000
8.000000
2009.000000
0.000000
0.000000
1.000000
0.000000
1.000000
1.000000
0.000000
0.000000
0.000000
1.000000
0.000000
0.000000
1.000000
0.000000
0.000000
0.000000
1.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.00000
0.000000
0.00000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
1.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
1.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
1.000000
0.000000
0.000000
1.000000
0.000000
0.000000
1.000000
0.000000
0.000000
0.000000
0.000000
1.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.0
0.000000
0.000000
0.000000
1.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
1.000000
0.000000
0.000000
1.000000
1.000000
0.000000
0.000000
1.000000
1.000000
0.000000
0.000000
0.000000
1.000000
1.000000
1.000000
0.000000
0.000000
0.000000
0.000000
1.000000
0.000000
1.000000
0.000000
0.000000
0.000000
1.000000
0.000000
0.000000
1.000000
0.000000
0.000000
1.000000
0.000000
0.000000
0.000000
1.000000
0.000000
0.000000
0.000000
0.000000
0.000000
1.000000
1.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
1.000000
1.000000
0.000000
0.000000
0.000000
1.000000
0.000000
1.000000
1.000000
0.000000
0.000000
0.000000
0.000000
0.000000
1.000000
0.000000
1.000000
1.000000
0.000000
0.000000
1.000000
0.000000
0.000000
0.000000
1.000000
0.000000
0.000000
1.000000
1.000000
0.000000
0.000000
0.000000
0.000000
1.000000
0.000000
0.000000
0.000000
0.000000
1.000000
0.000000
1.000000
0.000000
0.000000
1.000000
0.000000
0.000000
0.000000
1.000000
1.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
1.000000
0.000000
0.000000
0.000000
1.000000
0.000000
max
190.000000
313.000000
215245.000000
10.000000
9.000000
2010.000000
2010.000000
1600.000000
5644.000000
1474.000000
2336.000000
6110.000000
4692.000000
2065.000000
572.000000
5642.000000
3.000000
2.000000
3.000000
2.000000
8.000000
3.000000
14.000000
3.000000
2010.000000
4.000000
1418.000000
857.000000
547.000000
552.000000
508.000000
480.000000
738.000000
15500.000000
12.000000
2010.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.00000
1.000000
1.00000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
0.0
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 1)
In [11]:
scaler = StandardScaler()
X_train_std = scaler.fit_transform(X_train)
X_test_std = scaler.fit_transform(X_test)
In [12]:
def rmse(y_true, y_pred):
return np.sqrt(mean_squared_error(y_true, y_pred))
In [13]:
lr = LinearRegression()
lr.fit(X_train_std, y_train)
rmse(y_test, lr.predict(X_test_std))
Out[13]:
6450791737.1554108
Seems that Linear regression model performed very poorly. Most likely it is because model finds a lot of collinearity in the data due to the categorical columns.
Test lasso, which is more robust against multi collinearity.
In [14]:
lasso = Lasso(random_state=1, max_iter=10000)
lasso.fit(X_train_std, y_train)
rmse(y_test, lasso.predict(X_test_std))
Out[14]:
0.43567069666007524
This rmse score seems reasonable. Find cross validation scores.
In [15]:
scores = cross_val_score(cv=10, estimator = lasso, scoring="neg_mean_squared_error", X=X_train_std, y = y_train)
scores = np.sqrt(-scores)
scores
Out[15]:
array([ 0.35949238, 0.3949339 , 0.36642408, 0.39862593, 0.4117694 ,
0.40598546, 0.33920185, 0.36091773, 0.34487839, 0.43522637])
In [16]:
from sklearn import linear_model
from sklearn import metrics
from sklearn import tree
from sklearn import ensemble
from sklearn import neighbors
import xgboost as xgb
rs = 1
estimatores = {
#'Linear': linear_model.LinearRegression(),
'Ridge': linear_model.Ridge(random_state=rs, max_iter=10000),
'Lasso': linear_model.Lasso(random_state=rs, max_iter=10000),
'ElasticNet': linear_model.ElasticNet(random_state=rs, max_iter=10000),
'BayesRidge': linear_model.BayesianRidge(),
'OMP': linear_model.OrthogonalMatchingPursuit(),
'DecisionTree': tree.DecisionTreeRegressor(max_depth=10, random_state=rs),
'RandomForest': ensemble.RandomForestRegressor(random_state=rs),
'KNN': neighbors.KNeighborsRegressor(n_neighbors=5),
'GradientBoostingRegressor': ensemble.GradientBoostingRegressor(n_estimators=300, max_depth=4, learning_rate=0.01, loss="ls", random_state=rs),
'xgboost': xgb.XGBRegressor(max_depth=10)
}
errvals = {}
for k in estimatores:
e = estimatores[k]
e.fit(X_train_std, y_train)
err = np.sqrt(metrics.mean_squared_error(y_test, e.predict(X_test_std)))
errvals[k] = err
result = pd.Series.from_array(errvals).sort_values()
result.plot.barh(width = 0.8)
for y, error in enumerate(result):
plt.text(x = 0.01, y = y - 0.1, s = "%.3f" % error, fontweight='bold', color = "white")
plt.title("Performance comparison of algorithms")
Out[16]:
<matplotlib.text.Text at 0x123facef0>
In [ ]:
Content source: abulbasar/machine-learning
Similar notebooks: