In [1]:

    
# A logistic regression learning algorithm example using TensorFlow library.

# Author: Alaa Awad
# Project: https://www.kaggle.com/c/house-prices-advanced-regression-techniques



In [2]:

    
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib

import matplotlib.pyplot as plt
from scipy.stats import skew
from scipy.stats.stats import pearsonr

%config InlineBackend.figure_format = 'png' #set 'png' here when working on notebook
%matplotlib inline



In [3]:

    
train = pd.read_csv('../input/train.csv.gz')
test = pd.read_csv('../input/test.csv.gz')



In [4]:

    
train.head()









    Out[4]:






  
    
      
      Id
      MSSubClass
      MSZoning
      LotFrontage
      LotArea
      Street
      Alley
      LotShape
      LandContour
      Utilities
      ...
      PoolArea
      PoolQC
      Fence
      MiscFeature
      MiscVal
      MoSold
      YrSold
      SaleType
      SaleCondition
      SalePrice
    
  
  
    
      0
      1
      60
      RL
      65.0
      8450
      Pave
      NaN
      Reg
      Lvl
      AllPub
      ...
      0
      NaN
      NaN
      NaN
      0
      2
      2008
      WD
      Normal
      208500
    
    
      1
      2
      20
      RL
      80.0
      9600
      Pave
      NaN
      Reg
      Lvl
      AllPub
      ...
      0
      NaN
      NaN
      NaN
      0
      5
      2007
      WD
      Normal
      181500
    
    
      2
      3
      60
      RL
      68.0
      11250
      Pave
      NaN
      IR1
      Lvl
      AllPub
      ...
      0
      NaN
      NaN
      NaN
      0
      9
      2008
      WD
      Normal
      223500
    
    
      3
      4
      70
      RL
      60.0
      9550
      Pave
      NaN
      IR1
      Lvl
      AllPub
      ...
      0
      NaN
      NaN
      NaN
      0
      2
      2006
      WD
      Abnorml
      140000
    
    
      4
      5
      60
      RL
      84.0
      14260
      Pave
      NaN
      IR1
      Lvl
      AllPub
      ...
      0
      NaN
      NaN
      NaN
      0
      12
      2008
      WD
      Normal
      250000
    
  

5 rows × 81 columns



In [7]:

    
all_data = pd.concat((train.loc[:,'MSSubClass':'SaleCondition'],
                      test.loc[:,'MSSubClass':'SaleCondition']))
labels = train['SalePrice']



In [8]:

    
all_data.head()









    Out[8]:






  
    
      
      MSSubClass
      MSZoning
      LotFrontage
      LotArea
      Street
      Alley
      LotShape
      LandContour
      Utilities
      LotConfig
      ...
      ScreenPorch
      PoolArea
      PoolQC
      Fence
      MiscFeature
      MiscVal
      MoSold
      YrSold
      SaleType
      SaleCondition
    
  
  
    
      0
      60
      RL
      65.0
      8450
      Pave
      NaN
      Reg
      Lvl
      AllPub
      Inside
      ...
      0
      0
      NaN
      NaN
      NaN
      0
      2
      2008
      WD
      Normal
    
    
      1
      20
      RL
      80.0
      9600
      Pave
      NaN
      Reg
      Lvl
      AllPub
      FR2
      ...
      0
      0
      NaN
      NaN
      NaN
      0
      5
      2007
      WD
      Normal
    
    
      2
      60
      RL
      68.0
      11250
      Pave
      NaN
      IR1
      Lvl
      AllPub
      Inside
      ...
      0
      0
      NaN
      NaN
      NaN
      0
      9
      2008
      WD
      Normal
    
    
      3
      70
      RL
      60.0
      9550
      Pave
      NaN
      IR1
      Lvl
      AllPub
      Corner
      ...
      0
      0
      NaN
      NaN
      NaN
      0
      2
      2006
      WD
      Abnorml
    
    
      4
      60
      RL
      84.0
      14260
      Pave
      NaN
      IR1
      Lvl
      AllPub
      FR2
      ...
      0
      0
      NaN
      NaN
      NaN
      0
      12
      2008
      WD
      Normal
    
  

5 rows × 79 columns



In [8]:

    
all_data.describe()









    Out[8]:






  
    
      
      MSSubClass
      LotFrontage
      LotArea
      OverallQual
      OverallCond
      YearBuilt
      YearRemodAdd
      MasVnrArea
      BsmtFinSF1
      BsmtFinSF2
      ...
      GarageArea
      WoodDeckSF
      OpenPorchSF
      EnclosedPorch
      3SsnPorch
      ScreenPorch
      PoolArea
      MiscVal
      MoSold
      YrSold
    
  
  
    
      count
      2919.000000
      2433.000000
      2919.000000
      2919.000000
      2919.000000
      2919.000000
      2919.000000
      2896.000000
      2918.000000
      2918.000000
      ...
      2918.000000
      2919.000000
      2919.000000
      2919.000000
      2919.000000
      2919.000000
      2919.000000
      2919.000000
      2919.000000
      2919.000000
    
    
      mean
      57.137718
      69.305795
      10168.114080
      6.089072
      5.564577
      1971.312778
      1984.264474
      102.201312
      441.423235
      49.582248
      ...
      472.874572
      93.709832
      47.486811
      23.098321
      2.602261
      16.062350
      2.251799
      50.825968
      6.213087
      2007.792737
    
    
      std
      42.517628
      23.344905
      7886.996359
      1.409947
      1.113131
      30.291442
      20.894344
      179.334253
      455.610826
      169.205611
      ...
      215.394815
      126.526589
      67.575493
      64.244246
      25.188169
      56.184365
      35.663946
      567.402211
      2.714762
      1.314964
    
    
      min
      20.000000
      21.000000
      1300.000000
      1.000000
      1.000000
      1872.000000
      1950.000000
      0.000000
      0.000000
      0.000000
      ...
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      1.000000
      2006.000000
    
    
      25%
      20.000000
      59.000000
      7478.000000
      5.000000
      5.000000
      1953.500000
      1965.000000
      0.000000
      0.000000
      0.000000
      ...
      320.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      4.000000
      2007.000000
    
    
      50%
      50.000000
      68.000000
      9453.000000
      6.000000
      5.000000
      1973.000000
      1993.000000
      0.000000
      368.500000
      0.000000
      ...
      480.000000
      0.000000
      26.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      6.000000
      2008.000000
    
    
      75%
      70.000000
      80.000000
      11570.000000
      7.000000
      6.000000
      2001.000000
      2004.000000
      164.000000
      733.000000
      0.000000
      ...
      576.000000
      168.000000
      70.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      8.000000
      2009.000000
    
    
      max
      190.000000
      313.000000
      215245.000000
      10.000000
      9.000000
      2010.000000
      2010.000000
      1600.000000
      5644.000000
      1526.000000
      ...
      1488.000000
      1424.000000
      742.000000
      1012.000000
      508.000000
      576.000000
      800.000000
      17000.000000
      12.000000
      2010.000000
    
  

8 rows × 36 columns



In [10]:

    
all_data.columns









    Out[10]:





Index([u'MSSubClass', u'MSZoning', u'LotFrontage', u'LotArea', u'Street',
       u'Alley', u'LotShape', u'LandContour', u'Utilities', u'LotConfig',
       u'LandSlope', u'Neighborhood', u'Condition1', u'Condition2',
       u'BldgType', u'HouseStyle', u'OverallQual', u'OverallCond',
       u'YearBuilt', u'YearRemodAdd', u'RoofStyle', u'RoofMatl',
       u'Exterior1st', u'Exterior2nd', u'MasVnrType', u'MasVnrArea',
       u'ExterQual', u'ExterCond', u'Foundation', u'BsmtQual', u'BsmtCond',
       u'BsmtExposure', u'BsmtFinType1', u'BsmtFinSF1', u'BsmtFinType2',
       u'BsmtFinSF2', u'BsmtUnfSF', u'TotalBsmtSF', u'Heating', u'HeatingQC',
       u'CentralAir', u'Electrical', u'1stFlrSF', u'2ndFlrSF', u'LowQualFinSF',
       u'GrLivArea', u'BsmtFullBath', u'BsmtHalfBath', u'FullBath',
       u'HalfBath', u'BedroomAbvGr', u'KitchenAbvGr', u'KitchenQual',
       u'TotRmsAbvGrd', u'Functional', u'Fireplaces', u'FireplaceQu',
       u'GarageType', u'GarageYrBlt', u'GarageFinish', u'GarageCars',
       u'GarageArea', u'GarageQual', u'GarageCond', u'PavedDrive',
       u'WoodDeckSF', u'OpenPorchSF', u'EnclosedPorch', u'3SsnPorch',
       u'ScreenPorch', u'PoolArea', u'PoolQC', u'Fence', u'MiscFeature',
       u'MiscVal', u'MoSold', u'YrSold', u'SaleType', u'SaleCondition'],
      dtype='object')



In [11]:

    
print("Some housing prices statistics\n")
print(train['SalePrice'].describe())
print("\nThe median of the Housing Price is: ", train['SalePrice'].median(axis = 0))









    



Some housing prices statistics

count      1460.000000
mean     180921.195890
std       79442.502883
min       34900.000000
25%      129975.000000
50%      163000.000000
75%      214000.000000
max      755000.000000
Name: SalePrice, dtype: float64
('\nThe median of the Housing Price is: ', 163000.0)

Data preprocessing:

We're not going to do anything fancy here:

First I'll transform the skewed numeric features by taking log(feature + 1) - this will make the features more normal
Create Dummy variables for the categorical features
Replace the numeric missing values (NaN's) with the mean of their respective columns



In [15]:

    
matplotlib.rcParams['figure.figsize'] = (12.0, 6.0)
prices = pd.DataFrame({"price":train["SalePrice"], "log(price + 1)":np.log1p(train["SalePrice"])})
prices.hist()









    Out[15]:





array([[<matplotlib.axes._subplots.AxesSubplot object at 0x117d69290>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x117e075d0>]], dtype=object)



In [24]:

    
sns.distplot(train["SalePrice"], bins=30, kde = False, color = 'b', hist_kws={'alpha': 0.9})









    Out[24]:





<matplotlib.axes._subplots.AxesSubplot at 0x11b6e3dd0>



In [22]:

    
sns.distplot(np.log1p(train["SalePrice"]), bins=30, kde = False, color = 'b', hist_kws={'alpha': 0.9})









    Out[22]:





<matplotlib.axes._subplots.AxesSubplot at 0x11b39c410>

Numerical Features



In [25]:

    
corr = train.select_dtypes(include = ['float64', 'int64']).iloc[:, 1:].corr()
plt.figure(figsize=(12, 12))
sns.heatmap(corr, vmax=1, square=True)









    



//anaconda/lib/python2.7/site-packages/matplotlib/collections.py:571: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison
  if self._edgecolors == str('face'):






    Out[25]:





<matplotlib.axes._subplots.AxesSubplot at 0x11b9e5fd0>



In [28]:

    
cor_dict = corr['SalePrice'].to_dict()
del cor_dict['SalePrice']
print("List the numerical features decendingly by their correlation with Sale Price:\n")
for ele in sorted(cor_dict.items(), key = lambda x: -abs(x[1])):
    print("{0}: \t{1}".format(*ele))









    



List the numerical features decendingly by their correlation with Sale Price:

OverallQual: 	0.790981600584
GrLivArea: 	0.708624477613
GarageCars: 	0.640409197258
GarageArea: 	0.623431438918
TotalBsmtSF: 	0.613580551559
1stFlrSF: 	0.605852184692
FullBath: 	0.560663762748
TotRmsAbvGrd: 	0.533723155582
YearBuilt: 	0.522897332879
YearRemodAdd: 	0.507100967111
GarageYrBlt: 	0.486361677488
MasVnrArea: 	0.477493047096
Fireplaces: 	0.466928836752
BsmtFinSF1: 	0.386419806242
LotFrontage: 	0.351799096571
WoodDeckSF: 	0.324413444568
2ndFlrSF: 	0.319333802832
OpenPorchSF: 	0.315856227116
HalfBath: 	0.284107675595
LotArea: 	0.263843353871
BsmtFullBath: 	0.227122233131
BsmtUnfSF: 	0.214479105547
BedroomAbvGr: 	0.168213154301
KitchenAbvGr: 	-0.135907370842
EnclosedPorch: 	-0.128577957926
ScreenPorch: 	0.111446571143
PoolArea: 	0.0924035494919
MSSubClass: 	-0.0842841351266
OverallCond: 	-0.0778558940487
MoSold: 	0.0464322452238
3SsnPorch: 	0.0445836653357
YrSold: 	-0.0289225851687
LowQualFinSF: 	-0.0256061300007
MiscVal: 	-0.0211895796403
BsmtHalfBath: 	-0.0168441542974
BsmtFinSF2: 	-0.0113781214502

The housing price correlates strongly with `OverallQual, GrLivArea(GarageCars), GargeArea, TotalBsmtSF, 1stFlrSF, FullBath, TotRmsAbvGrd, YearBuilt, YearRemodAdd, GargeYrBlt, MasVnrArea` and `Fireplaces`. But some of those features are highly correlated among each others.



In [30]:

    
sns.regplot(x = 'OverallQual', y = 'SalePrice', data = train, color = 'Orange')









    Out[30]:





<matplotlib.axes._subplots.AxesSubplot at 0x11bd9c150>



In [36]:

    
plt.figure(1)

f, axarr = plt.subplots(3, 3, figsize=(10, 9))
price = train.SalePrice.values
axarr[0, 0].scatter(train.GrLivArea.values, price)
axarr[0, 0].set_title('GrLiveArea')
axarr[0, 1].scatter(train.GarageArea.values, price)
axarr[0, 1].set_title('GarageArea')
axarr[0, 2].scatter(train.GarageCars.values, price)
axarr[0, 2].set_title('GarageCars')
axarr[1, 0].scatter(train.TotalBsmtSF.values, price)
axarr[1, 0].set_title('TotalBsmtSF')
axarr[1, 1].scatter(train['1stFlrSF'].values, price)
axarr[1, 1].set_title('1stFlrSF')
axarr[1, 2].scatter(train.OverallQual.values, price)
axarr[1, 2].set_title('OverallQual')
axarr[2, 0].scatter(train.TotRmsAbvGrd.values, price)
axarr[2, 0].set_title('TotRmsAbvGrd')
axarr[2, 1].scatter(train.MasVnrArea.values, price)
axarr[2, 1].set_title('MasVnrArea')
axarr[2, 2].scatter(train.YearBuilt.values, price)
axarr[2, 2].set_title('YearBuilt')

f.text(-0.01, 0.5, 'Sale Price', va='center', rotation='vertical', fontsize = 12)
plt.tight_layout()
plt.show()









    





<matplotlib.figure.Figure at 0x11d68bd90>



In [38]:

    
fig = plt.figure(2, figsize=(9, 7))
plt.subplot(211)
plt.scatter(train.YearRemodAdd.values, price)
plt.title('YearRemodAdd')

plt.subplot(212)
plt.scatter(train.YearBuilt.values, price)
plt.title('YearBuilt')

fig.text(-0.01, 0.5, 'Sale Price', va = 'center', rotation = 'vertical', fontsize = 12)

plt.tight_layout()

Categorical Features



In [40]:

    
print(train.select_dtypes(include=['object']).columns.values)









    



['MSZoning' 'Street' 'Alley' 'LotShape' 'LandContour' 'Utilities'
 'LotConfig' 'LandSlope' 'Neighborhood' 'Condition1' 'Condition2'
 'BldgType' 'HouseStyle' 'RoofStyle' 'RoofMatl' 'Exterior1st' 'Exterior2nd'
 'MasVnrType' 'ExterQual' 'ExterCond' 'Foundation' 'BsmtQual' 'BsmtCond'
 'BsmtExposure' 'BsmtFinType1' 'BsmtFinType2' 'Heating' 'HeatingQC'
 'CentralAir' 'Electrical' 'KitchenQual' 'Functional' 'FireplaceQu'
 'GarageType' 'GarageFinish' 'GarageQual' 'GarageCond' 'PavedDrive'
 'PoolQC' 'Fence' 'MiscFeature' 'SaleType' 'SaleCondition']



In [41]:

    
plt.figure(figsize = (12, 6))
sns.boxplot(x = 'Neighborhood', y = 'SalePrice',  data = train)
xt = plt.xticks(rotation=45)



In [42]:

    
plt.figure(figsize = (12, 6))
sns.countplot(x = 'Neighborhood', data = train)
xt = plt.xticks(rotation=45)

Could group those Neighborhoods with similar housing price into a same bucket for dimension-reduction.

Housing Price vs Sales

Sale Type & Condition
Sales Seasonality



In [44]:

    
fig, ax = plt.subplots(1, 2, figsize = (10, 6))
sns.boxplot(x = 'SaleType', y = 'SalePrice', data = train, ax = ax[0])
sns.boxplot(x = 'SaleCondition', y = 'SalePrice', data = train, ax = ax[1])
plt.tight_layout()



In [47]:

    
g = sns.FacetGrid(train, col = 'YrSold', col_wrap = 3)
g.map(sns.boxplot, 'MoSold', 'SalePrice', palette='Set2', order = range(1, 13))\
.set(ylim = (0, 500000))
plt.tight_layout()

Sale's timing does not seem to hugely affect the house.

Housing Style



In [49]:

    
fig, ax = plt.subplots(2, 1, figsize = (10, 8))
sns.boxplot(x = 'BldgType', y = 'SalePrice', data = train, ax = ax[0])
sns.boxplot(x = 'HouseStyle', y = 'SalePrice', data = train, ax = ax[1])









    Out[49]:





<matplotlib.axes._subplots.AxesSubplot at 0x11de83950>

House Condition



In [51]:

    
fig, ax = plt.subplots(2, 1, figsize = (10, 8))
sns.boxplot(x = 'Condition1', y = 'SalePrice', data = train, ax = ax[0])
sns.boxplot(x = 'Exterior1st', y = 'SalePrice', data = train, ax = ax[1])
x = plt.xticks(rotation = 45)
plt.show()



In [53]:

    
fig, ax = plt.subplots(2, 2, figsize = (10, 8))
sns.boxplot('BsmtCond', 'SalePrice', data = train, ax = ax[0, 0])
sns.boxplot('BsmtQual', 'SalePrice', data = train, ax = ax[0, 1])
sns.boxplot('BsmtExposure', 'SalePrice', data = train, ax = ax[1, 0])
sns.boxplot('BsmtFinType1', 'SalePrice', data = train, ax = ax[1, 1])









    Out[53]:





<matplotlib.axes._subplots.AxesSubplot at 0x122813190>

Home Functionality



In [56]:

    
sns.violinplot('Functional', 'SalePrice', data = train)









    Out[56]:





<matplotlib.axes._subplots.AxesSubplot at 0x11e095490>

FirePlaceQu



In [63]:

    
sns.factorplot('FireplaceQu', 'SalePrice', data = train, color = 'm', \
               estimator = np.median, order = ['Ex', 'Gd', 'TA', 'Fa', 'Po'], size = 4.5,  aspect=1.35)









    Out[63]:





<seaborn.axisgrid.FacetGrid at 0x1238cb710>



In [65]:

    
pd.crosstab(train.Fireplaces, train.FireplaceQu)









    Out[65]:






  
    
      FireplaceQu
      Ex
      Fa
      Gd
      Po
      TA
    
    
      Fireplaces
      
      
      
      
      
    
  
  
    
      1
      19
      28
      324
      20
      259
    
    
      2
      4
      4
      54
      0
      53
    
    
      3
      1
      1
      2
      0
      1



In [67]:

    
g = sns.FacetGrid(train, col = 'FireplaceQu', col_wrap = 3, col_order=['Ex', 'Gd', 'TA', 'Fa', 'Po'])
g.map(sns.boxplot, 'Fireplaces', 'SalePrice', order = [1, 2, 3], palette = 'Set2')









    Out[67]:





<seaborn.axisgrid.FacetGrid at 0x123d247d0>

Heating

Ames is a cold place in winter, so heating (as well as fireplace qualities) are quite important.



In [69]:

    
pd.crosstab(train.HeatingQC, train.CentralAir)



In [70]:

    
pd.crosstab(train.HeatingQC, train.FireplaceQu)









    Out[70]:






  
    
      FireplaceQu
      Ex
      Fa
      Gd
      Po
      TA
    
    
      HeatingQC
      
      
      
      
      
    
  
  
    
      Ex
      22
      14
      254
      4
      160
    
    
      Fa
      0
      1
      13
      1
      5
    
    
      Gd
      2
      3
      45
      5
      57
    
    
      TA
      0
      15
      68
      10
      91



In [73]:

    
sns.factorplot('HeatingQC', 'SalePrice', hue = 'CentralAir', estimator = np.mean, data = train, 
             size = 4.5, aspect = 1.4)









    Out[73]:





<seaborn.axisgrid.FacetGrid at 0x120422d90>



In [75]:

    
fig, ax = plt.subplots(1, 2, figsize = (10, 4))
sns.boxplot('Electrical', 'SalePrice', data = train, ax = ax[0]).set(ylim = (0, 400000))
sns.countplot('Electrical', data = train)
plt.tight_layout()

Kitchen Quality



In [77]:

    
sns.factorplot('KitchenQual', 'SalePrice', estimator = np.mean, 
               size = 4.5, aspect = 1.4, data = train, order = ['Ex', 'Gd', 'TA', 'Fa'])









    Out[77]:





<seaborn.axisgrid.FacetGrid at 0x125577e50>

MSZonig



In [79]:

    
sns.boxplot(x = 'MSZoning', y = 'SalePrice', data = train)









    Out[79]:





<matplotlib.axes._subplots.AxesSubplot at 0x125d2df90>

Street & Alley Access



In [82]:

    
fig, ax = plt.subplots(1, 2, figsize = (10, 4))
sns.boxplot(x = 'Street', y = 'SalePrice', data = train, ax = ax[0], order=['Grvl','Pave'])
sns.boxplot(x = 'Alley', y = 'SalePrice', data = train, ax = ax[1], order=['Grvl','Pave'])
plt.tight_layout()



In [85]:

    
print("The NA's in Alley is: ", train['Alley'].isnull().sum())
print("\nThere are so many NA's in Alley. When Alley is NA, Street = ", 
      train[train.Alley.notnull()].Street.unique())
print("\n", pd.crosstab(train.Street, train.Alley))









    



("The NA's in Alley is: ", 1369)
("\nThere are so many NA's in Alley. When Alley is NA, Street = ", array(['Pave'], dtype=object))
('\n', Alley   Grvl  Pave
Street            
Pave      50    41)



In [ ]:

	Id	MSSubClass	MSZoning	LotFrontage	LotArea	Street	Alley	LotShape	LandContour	Utilities	...	PoolQC	Fence	MiscFeature	MoSold	YrSold	SaleType	SaleCondition	SalePrice
0	1	60	RL	65.0	8450	Pave	NaN	Reg	Lvl	AllPub	...	NaN	NaN	NaN	2	2008	WD	Normal	208500
1	2	20	RL	80.0	9600	Pave	NaN	Reg	Lvl	AllPub	...	NaN	NaN	NaN	5	2007	WD	Normal	181500
2	3	60	RL	68.0	11250	Pave	NaN	IR1	Lvl	AllPub	...	NaN	NaN	NaN	9	2008	WD	Normal	223500
3	4	70	RL	60.0	9550	Pave	NaN	IR1	Lvl	AllPub	...	NaN	NaN	NaN	2	2006	WD	Abnorml	140000
4	5	60	RL	84.0	14260	Pave	NaN	IR1	Lvl	AllPub	...	NaN	NaN	NaN	12	2008	WD	Normal	250000

	MSSubClass	LotFrontage	LotArea	OverallQual	OverallCond	YearBuilt	YearRemodAdd	MasVnrArea	BsmtFinSF1	BsmtFinSF2	...	GarageArea	WoodDeckSF	OpenPorchSF	EnclosedPorch	3SsnPorch	ScreenPorch	PoolArea	MiscVal	MoSold	YrSold
count	2919.000000	2433.000000	2919.000000	2919.000000	2919.000000	2919.000000	2919.000000	2896.000000	2918.000000	2918.000000	...	2918.000000	2919.000000	2919.000000	2919.000000	2919.000000	2919.000000	2919.000000	2919.000000	2919.000000	2919.000000
mean	57.137718	69.305795	10168.114080	6.089072	5.564577	1971.312778	1984.264474	102.201312	441.423235	49.582248	...	472.874572	93.709832	47.486811	23.098321	2.602261	16.062350	2.251799	50.825968	6.213087	2007.792737
std	42.517628	23.344905	7886.996359	1.409947	1.113131	30.291442	20.894344	179.334253	455.610826	169.205611	...	215.394815	126.526589	67.575493	64.244246	25.188169	56.184365	35.663946	567.402211	2.714762	1.314964
min	20.000000	21.000000	1300.000000	1.000000	1.000000	1872.000000	1950.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	1.000000	2006.000000
25%	20.000000	59.000000	7478.000000	5.000000	5.000000	1953.500000	1965.000000	0.000000	0.000000	0.000000	...	320.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	4.000000	2007.000000
50%	50.000000	68.000000	9453.000000	6.000000	5.000000	1973.000000	1993.000000	0.000000	368.500000	0.000000	...	480.000000	0.000000	26.000000	0.000000	0.000000	0.000000	0.000000	0.000000	6.000000	2008.000000
75%	70.000000	80.000000	11570.000000	7.000000	6.000000	2001.000000	2004.000000	164.000000	733.000000	0.000000	...	576.000000	168.000000	70.000000	0.000000	0.000000	0.000000	0.000000	0.000000	8.000000	2009.000000
max	190.000000	313.000000	215245.000000	10.000000	9.000000	2010.000000	2010.000000	1600.000000	5644.000000	1526.000000	...	1488.000000	1424.000000	742.000000	1012.000000	508.000000	576.000000	800.000000	17000.000000	12.000000	2010.000000