In [63]:
%matplotlib inline

In [129]:
import numpy as np
import pandas as pd

# 統計用ツール
import statsmodels.api as sm
import statsmodels.tsa.api as tsa
from patsy import dmatrices

# 自作の空間統計用ツール
from spatialstat import *

#描画
import matplotlib.pyplot as plt
from pandas.tools.plotting import autocorrelation_plot

空間計量モデルとOLS・最尤推定・GMM

1 CSVをpandasで取り込む。


In [67]:
df = pd.read_csv('bukken_data.csv')

In [68]:
df.columns


Out[68]:
Index(['apart_dummy', 'building_year', 'dk', 'fX', 'fY', 'floor', 'k', 'lk',
       'mansyon_dumy', 'new_dummy', 'pay', 'published_date', 'r', 'rc_dummy',
       'room_nums', 'sdk', 'sk', 'sldk', 'slk', 'south_direction_dummy',
       'square', 'teiki_syakuya_dummy', 'walk_minute_dummy'],
      dtype='object')

2 空間隣接行列を作成する。


In [118]:
S = np.matrix(S_matrix(df, 10, 0.5))

3 OLS推定


In [159]:
vars = ['pay', 'square', 'k', 'lk', 'dk', 'sdk', 'sldk', 'south_direction_dummy', 'building_year', 
        'new_dummy', 'mansyon_dumy', 'teiki_syakuya_dummy', 'walk_minute_dummy', 'r', 'rc_dummy', 'room_nums']
eq = fml_build(vars)

y, X = dmatrices(eq, data=df, return_type='dataframe')

logy = np.log(y)

model = sm.OLS(logy, X, intercept=True)
results = model.fit()
print(results.summary())


                            OLS Regression Results                            
==============================================================================
Dep. Variable:                    pay   R-squared:                       0.800
Model:                            OLS   Adj. R-squared:                  0.798
Method:                 Least Squares   F-statistic:                     454.3
Date:                Sun, 20 Nov 2016   Prob (F-statistic):               0.00
Time:                        03:19:31   Log-Likelihood:                 397.34
No. Observations:                1489   AIC:                            -766.7
Df Residuals:                    1475   BIC:                            -692.4
Df Model:                          13                                         
Covariance Type:            nonrobust                                         
=========================================================================================
                            coef    std err          t      P>|t|      [95.0% Conf. Int.]
-----------------------------------------------------------------------------------------
Intercept                 5.7449      0.018    316.295      0.000         5.709     5.780
square                    0.0199      0.000     43.492      0.000         0.019     0.021
k                        -0.1522      0.016     -9.276      0.000        -0.184    -0.120
lk                    -2.984e-15   4.25e-17    -70.152      0.000     -3.07e-15  -2.9e-15
dk                       -0.0410      0.019     -2.190      0.029        -0.078    -0.004
sdk                       0.0329      0.132      0.249      0.803        -0.227     0.293
sldk                     -0.2925      0.085     -3.436      0.001        -0.459    -0.126
south_direction_dummy    -0.0055      0.014     -0.407      0.684        -0.032     0.021
building_year            -0.0097      0.000    -21.794      0.000        -0.011    -0.009
new_dummy                -0.0140      0.010     -1.329      0.184        -0.035     0.007
mansyon_dumy              5.7449      0.018    316.295      0.000         5.709     5.780
teiki_syakuya_dummy       0.0334      0.031      1.076      0.282        -0.027     0.094
walk_minute_dummy        -0.0012      0.004     -0.293      0.769        -0.009     0.007
r                        -0.1390      0.017     -8.350      0.000        -0.172    -0.106
rc_dummy                  0.0153      0.022      0.689      0.491        -0.028     0.059
room_nums                -0.0304      0.015     -2.035      0.042        -0.060    -0.001
==============================================================================
Omnibus:                      873.427   Durbin-Watson:                   1.487
Prob(Omnibus):                  0.000   Jarque-Bera (JB):           984703.367
Skew:                           1.211   Prob(JB):                         0.00
Kurtosis:                     128.959   Cond. No.                     3.33e+18
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The smallest eigenvalue is 2.5e-31. This might indicate that there are
strong multicollinearity problems or that the design matrix is singular.

なお空間統計でSEMを考えても、OLSやGLSは空間相関の有無に関わらず不偏であり、したがって汎化にはあまり関係ない。