In [10]:
%matplotlib inline

In [11]:
import numpy as np
import pandas as pd

# 統計用ツール
import statsmodels.api as sm
import statsmodels.tsa.api as tsa
from patsy import dmatrices

# 自作の空間統計用ツール
from spatialstat import *

#描画
import matplotlib.pyplot as plt
from pandas.tools.plotting import autocorrelation_plot

OLS

1 CSVをpandasで取り込む。


In [35]:
df = pd.read_csv('bukken_data.csv')

In [24]:
df = df[:][df['pay'] < 300000]

In [48]:
df.columns


Out[48]:
Index(['apart_dummy', 'building_year', 'dk', 'fX', 'fY', 'floor', 'k', 'lk',
       'mansyon_dumy', 'new_dummy', 'pay', 'published_date', 'r', 'rc_dummy',
       'room_nums', 'sdk', 'sk', 'sldk', 'slk', 'south_direction_dummy',
       'square', 'teiki_syakuya_dummy', 'walk_minute_dummy'],
      dtype='object')

2 OLS推定

リストから回帰式を生成する関数。spatialstat内にあるのでimportすれば使える。


In [49]:
def fml_build(varlst):
    """
    Binding OLS formula from a list of variable names
    varlst: variable names, the 1st var should be endogeneouse variable
    """
    varlst.reverse()
    fml=varlst.pop()+'~'
    while len(varlst) != 0:
        fml=fml+'+'+varlst.pop()
    return fml

In [50]:
vars = ['pay', 'square', 'k', 'lk', 'dk', 'sdk', 'sldk', 'south_direction_dummy', 'building_year', 
        'new_dummy', 'mansyon_dumy', 'teiki_syakuya_dummy', 'walk_minute_dummy', 'r', 'rc_dummy', 'room_nums']
eq = fml_build(vars)

y, X = dmatrices(eq, data=df, return_type='dataframe')

logy = np.log(y)

model = sm.OLS(logy, X, intercept=True)
results = model.fit()
print(results.summary())


                            OLS Regression Results                            
==============================================================================
Dep. Variable:                    pay   R-squared:                       0.807
Model:                            OLS   Adj. R-squared:                  0.805
Method:                 Least Squares   F-statistic:                     453.8
Date:                Sun, 20 Nov 2016   Prob (F-statistic):               0.00
Time:                        18:05:56   Log-Likelihood:                 660.63
No. Observations:                1427   AIC:                            -1293.
Df Residuals:                    1413   BIC:                            -1220.
Df Model:                          13                                         
Covariance Type:            nonrobust                                         
=========================================================================================
                            coef    std err          t      P>|t|      [95.0% Conf. Int.]
-----------------------------------------------------------------------------------------
Intercept                 5.6474      0.017    328.289      0.000         5.614     5.681
square                    0.0237      0.001     39.271      0.000         0.023     0.025
k                        -0.0600      0.016     -3.660      0.000        -0.092    -0.028
lk                    -1.674e-14   6.12e-16    -27.375      0.000     -1.79e-14 -1.55e-14
dk                        0.0173      0.016      1.068      0.286        -0.014     0.049
sdk                      -0.0807      0.154     -0.525      0.600        -0.382     0.221
sldk                     -0.2233      0.110     -2.039      0.042        -0.438    -0.009
south_direction_dummy    -0.0134      0.012     -1.166      0.244        -0.036     0.009
building_year            -0.0092      0.000    -23.890      0.000        -0.010    -0.008
new_dummy                -0.0173      0.009     -1.966      0.049        -0.034 -4.25e-05
mansyon_dumy              5.6474      0.017    328.289      0.000         5.614     5.681
teiki_syakuya_dummy       0.0140      0.027      0.511      0.610        -0.040     0.068
walk_minute_dummy        -0.0023      0.003     -0.681      0.496        -0.009     0.004
r                        -0.0584      0.016     -3.546      0.000        -0.091    -0.026
rc_dummy                  0.0557      0.018      3.015      0.003         0.019     0.092
room_nums                -0.0651      0.014     -4.649      0.000        -0.093    -0.038
==============================================================================
Omnibus:                     1699.589   Durbin-Watson:                   1.411
Prob(Omnibus):                  0.000   Jarque-Bera (JB):           553540.298
Skew:                          -5.637   Prob(JB):                         0.00
Kurtosis:                      98.826   Cond. No.                     2.60e+17
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The smallest eigenvalue is 3.51e-29. This might indicate that there are
strong multicollinearity problems or that the design matrix is singular.

なお空間統計でSEMを考えても、OLSやGLSは空間相関の有無に関わらず不偏であり、したがって汎化にはあまり関係ない。

時系列の系列相関用だが一応。空間相関の検出法については後述


In [51]:
error = results.resid

# ADF test, H0: Non-stationary
tsa.adfuller(error, regression='nc')
 
# Autocorrel plot of resid
autocorrelation_plot(error) # Show ACF of residuals
ACF_resid=tsa.acf(error) # Keep ACF of residuals


OLSによる予測値。


In [52]:
results.predict()


Out[52]:
array([ 11.63971598,  11.7000213 ,  11.71728911, ...,  11.55881281,
        11.73382293,  11.74532396])

In [53]:
pred = results.predict()

evl = df['pay'] - np.exp(pred)

In [54]:
evl


Out[54]:
0        62482.080791
1        54425.717410
2        48325.582684
3       101934.754258
4        55432.534833
5        39425.717410
6       -10156.891428
7        11462.663055
8        -8926.192885
9       -17651.755132
10      -10680.469281
11      -12594.697488
12      -13294.822135
13      -13987.290275
14      -25775.089333
15      -13735.464761
16      -17188.044769
17      -12188.044769
18      -20223.518958
19      -15935.664751
20      -14663.023631
21      -13294.822135
22     -154644.473361
23        5340.019089
24        7340.019089
25        7511.678468
26     -124644.473361
27        -517.793654
28      -10585.774268
29        4506.845487
            ...      
1457     36053.773613
1458     26394.178831
1459    -17298.831612
1460     -4952.416268
1461      5401.338769
1462     19793.774034
1463     -8261.650060
1464    -52043.710532
1465     -1434.092000
1466    -32610.336058
1467    -27755.692080
1468    -28279.688984
1469     -6954.630638
1470      1796.550823
1471    -13345.684993
1472      8206.673786
1473      8231.613088
1476     -7455.221856
1477     13492.864092
1478      6053.306537
1479      5903.517038
1480     10516.324421
1481     10516.324421
1482       588.418309
1483     35476.525660
1484      5798.573517
1485     -1468.647337
1486      5304.354441
1487    -24719.553716
1488      3837.761828
Name: pay, dtype: float64

In [55]:
evl.hist(figsize=(7,5), bins=50)


Out[55]:
<matplotlib.axes._subplots.AxesSubplot at 0x11910b588>

In [56]:
print(evl.mean())
print(evl.std())


1098.8234646571307
20932.887025534183

In [39]:
a = df['pay'][df['pay'] > 4000000]

In [40]:
a.count()


Out[40]:
0

In [45]:
df[:][df['pay'] < 300000]


Out[45]:
apart_dummy building_year dk fX fY floor k lk mansyon_dumy new_dummy ... rc_dummy room_nums sdk sk sldk slk south_direction_dummy square teiki_syakuya_dummy walk_minute_dummy
0 0 42 0 35.655681 139.733959 3 1 0 1 0 ... 0 3 0 0 0 0 0 41.81 0 3
1 0 42 1 35.655681 139.733959 3 0 0 1 1 ... 0 3 0 0 0 0 0 41.82 0 3
2 0 42 1 35.655681 139.733959 3 0 0 1 0 ... 0 3 0 0 0 0 0 41.82 0 3
3 0 42 0 35.655681 139.733959 2 0 0 1 1 ... 0 1 0 0 0 0 0 33.00 0 3
4 0 42 0 35.655681 139.733959 3 0 0 1 0 ... 0 3 0 0 0 0 0 41.82 0 3
5 0 42 1 35.655681 139.733959 3 0 0 1 1 ... 0 3 0 0 0 0 0 41.82 0 3
6 0 39 0 35.656972 139.732992 7 0 0 1 1 ... 1 2 0 0 0 0 1 53.69 0 2
7 0 39 0 35.656972 139.732992 2 0 0 1 1 ... 1 1 0 0 0 0 1 48.14 0 2
8 0 39 0 35.656972 139.732992 4 0 0 1 1 ... 1 1 0 0 0 0 1 40.00 0 2
9 0 39 0 35.656972 139.732992 5 0 0 1 1 ... 1 1 0 0 0 0 0 42.00 0 2
10 0 39 0 35.656972 139.732992 5 0 0 1 1 ... 1 1 0 0 0 0 1 42.00 0 2
11 0 39 0 35.656972 139.732992 7 0 0 1 1 ... 1 2 0 0 0 0 0 53.69 0 2
12 0 39 0 35.656972 139.732992 6 0 0 1 0 ... 1 2 0 0 0 0 1 53.69 0 2
13 0 39 0 35.656972 139.732992 7 0 0 1 0 ... 1 1 0 0 0 0 1 40.78 0 2
14 0 39 0 35.656972 139.732992 3 0 0 1 0 ... 1 2 0 0 0 0 0 53.69 0 2
15 0 39 0 35.656972 139.732992 5 0 0 1 0 ... 1 1 0 0 0 0 0 40.14 0 2
16 0 39 0 35.656972 139.732992 8 0 0 1 0 ... 1 1 0 0 0 0 0 41.14 0 2
17 0 39 0 35.656972 139.732992 6 0 0 1 0 ... 1 1 0 0 0 0 0 41.14 0 2
18 0 39 0 35.656972 139.732992 5 0 0 1 0 ... 1 1 0 0 0 0 0 42.00 0 2
19 0 39 0 35.656972 139.732992 7 0 0 1 0 ... 1 1 0 0 0 0 0 40.78 0 2
20 0 39 0 35.656972 139.732992 6 0 0 1 1 ... 1 1 0 0 0 0 0 42.57 0 2
21 0 39 0 35.656972 139.732992 7 0 0 1 0 ... 1 2 0 0 0 0 1 53.69 0 2
22 0 24 0 35.653603 139.734767 3 0 0 1 1 ... 1 3 0 0 0 0 0 82.02 0 2
23 0 24 1 35.653603 139.734767 4 0 0 1 1 ... 1 2 0 0 0 0 0 42.29 0 2
24 0 24 1 35.653603 139.734767 5 0 0 1 1 ... 1 2 0 0 0 0 0 42.29 0 2
25 0 24 1 35.653603 139.734767 4 0 0 1 1 ... 1 2 0 0 0 0 1 42.29 0 2
26 0 24 0 35.653603 139.734767 5 0 0 1 1 ... 1 3 0 0 0 0 0 82.02 0 2
27 0 24 0 35.653603 139.734767 4 0 0 1 0 ... 1 1 0 0 0 0 0 42.03 0 2
28 0 24 0 35.653603 139.734767 3 0 0 1 0 ... 1 1 0 0 0 0 0 42.29 0 2
29 0 24 1 35.653603 139.734767 8 0 0 1 0 ... 1 2 0 0 0 0 0 42.29 0 2
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1457 0 42 1 35.656759 139.734067 5 0 0 1 1 ... 1 2 0 0 0 0 0 40.97 0 1
1458 0 39 1 35.657000 139.734742 6 0 0 1 1 ... 1 1 0 0 0 0 0 30.13 0 1
1459 0 44 1 35.654370 139.734673 3 0 0 1 1 ... 1 1 0 0 0 0 0 29.36 1 4
1460 0 43 0 35.655014 139.734973 4 0 0 1 1 ... 1 1 0 0 0 0 0 33.00 0 2
1461 0 43 0 35.655014 139.734973 5 1 0 1 1 ... 1 1 0 0 0 0 0 26.73 1 2
1462 0 5 0 35.654870 139.736117 5 1 0 1 1 ... 1 1 0 0 0 0 0 28.50 0 1
1463 0 10 0 35.653853 139.736578 5 1 0 1 0 ... 1 1 0 0 0 0 0 23.14 0 1
1464 0 35 0 35.656792 139.733751 4 0 0 1 1 ... 1 2 0 0 0 0 0 62.24 0 2
1465 0 35 1 35.654976 139.736009 6 0 0 1 1 ... 1 1 0 0 0 0 0 34.50 0 2
1466 0 0 0 35.654903 139.737273 9 0 0 1 2 ... 1 1 0 0 0 0 0 51.00 0 1
1467 0 0 0 35.654903 139.737273 11 0 0 1 2 ... 1 1 0 0 0 0 0 50.86 0 1
1468 0 0 0 35.654903 139.737273 5 0 0 1 2 ... 1 1 0 0 0 0 0 47.18 0 1
1469 0 43 0 35.655014 139.734973 4 0 0 1 0 ... 1 1 0 0 0 0 0 33.00 0 2
1470 0 15 0 35.652809 139.736306 8 0 0 1 1 ... 1 1 0 0 0 0 0 21.30 0 2
1471 0 35 1 35.656792 139.733751 2 0 0 1 1 ... 1 1 0 0 0 0 0 33.82 0 2
1472 0 30 1 35.652856 139.734556 5 0 0 1 1 ... 1 1 0 0 0 0 0 39.15 0 4
1473 0 9 0 35.652740 139.735306 6 0 0 1 1 ... 1 1 0 0 0 0 0 41.07 0 3
1476 0 13 0 35.654092 139.735501 7 1 0 1 0 ... 1 1 0 0 0 0 0 20.36 0 2
1477 0 17 0 35.652817 139.735450 4 0 0 1 0 ... 1 1 0 0 1 0 0 61.22 0 3
1478 0 43 0 35.655014 139.734973 5 1 0 1 1 ... 1 1 0 0 0 0 0 26.73 1 5
1479 0 43 0 35.655014 139.734973 5 0 0 1 1 ... 1 1 0 0 0 0 0 26.73 1 5
1480 0 45 0 35.652931 139.736392 8 0 0 1 1 ... 1 1 0 0 0 0 0 18.05 0 3
1481 0 45 0 35.652931 139.736392 8 0 0 1 1 ... 1 1 0 0 0 0 0 18.05 0 3
1482 0 31 0 35.654365 139.736125 2 1 0 1 1 ... 1 1 0 0 0 0 0 28.03 0 2
1483 0 11 0 35.654015 139.736473 14 0 0 1 1 ... 1 1 0 0 0 0 0 50.57 0 1
1484 0 32 0 35.657228 139.732803 2 0 0 1 1 ... 1 1 0 0 0 0 0 18.19 0 3
1485 0 9 0 35.654642 139.735175 5 0 0 1 1 ... 1 1 0 0 0 0 0 52.93 0 3
1486 0 34 1 35.654426 139.734481 6 0 0 1 1 ... 1 1 0 0 0 0 0 24.95 0 3
1487 0 7 0 35.652809 139.736306 10 0 0 1 0 ... 1 1 0 0 0 0 0 24.25 0 2
1488 0 35 1 35.654970 139.736503 3 0 0 1 1 ... 1 1 0 0 0 0 0 33.00 0 1

1427 rows × 23 columns


In [130]:
df_by = pd.read_csv('bukken_data.csv')

In [131]:
df_by['building_year'][(df_by['building_year'] > 30)] = 1
df_by['building_year'][(df_by['building_year'] <= 30)] = 2
df_by['building_year'][(df_by['building_year'] <= 20)] = 3
df_by['building_year'][(df_by['building_year'] <= 15)] = 4
df_by['building_year'][(df_by['building_year'] <= 10)] = 5
df_by['building_year'][(df_by['building_year'] <= 5)] = 6


/Users/NIGG/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':
/Users/NIGG/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app
/Users/NIGG/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:3: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()
/Users/NIGG/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:4: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
/Users/NIGG/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:5: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
/Users/NIGG/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:6: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy

In [132]:
by = pd.get_dummies(df_by['building_year'])

In [133]:
varname = []
for i in by.columns:
    for var in ['square', 'k', 'lk', 'dk', 'sdk', 'sldk', 'south_direction_dummy', 'mansyon_dumy', 'walk_minute_dummy', 'r', 'rc_dummy', 'room_nums']:
        df_by['{var}{i}'.format(**locals())] = by[i]*df_by[var]
        varname.append('{var}{i}'.format(**locals()))

In [134]:
vars = ['pay', 'square', 'k', 'lk', 'dk', 'sdk', 'sldk', 'south_direction_dummy', 'building_year', 
        'new_dummy', 'mansyon_dumy', 'teiki_syakuya_dummy', 'walk_minute_dummy', 'r', 'rc_dummy', 'room_nums'] + varname
eq = fml_build(vars)

y, X = dmatrices(eq, data=df_by, return_type='dataframe')

logy = np.log(y)

model = sm.OLS(logy, X, intercept=True)
results = model.fit()
print(results.summary())


                            OLS Regression Results                            
==============================================================================
Dep. Variable:                    pay   R-squared:                       0.736
Model:                            OLS   Adj. R-squared:                  0.734
Method:                 Least Squares   F-statistic:                     342.6
Date:                Wed, 23 Nov 2016   Prob (F-statistic):               0.00
Time:                        20:46:13   Log-Likelihood:                 189.50
No. Observations:                1489   AIC:                            -353.0
Df Residuals:                    1476   BIC:                            -284.0
Df Model:                          12                                         
Covariance Type:            nonrobust                                         
==========================================================================================
                             coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------------------
Intercept                  0.2906      0.001    276.959      0.000         0.289     0.293
square                     0.0107      0.000     41.292      0.000         0.010     0.011
k                         -0.0384      0.009     -4.169      0.000        -0.057    -0.020
lk                     -2.943e-17   8.94e-18     -3.294      0.001      -4.7e-17 -1.19e-17
dk                        -0.0319      0.011     -2.971      0.003        -0.053    -0.011
sdk                        0.0494      0.076      0.650      0.516        -0.100     0.199
sldk                      -0.1567      0.049     -3.203      0.001        -0.253    -0.061
south_direction_dummy     -0.0175      0.008     -2.250      0.025        -0.033    -0.002
building_year              1.7434      0.006    276.959      0.000         1.731     1.756
new_dummy                 -0.0182      0.012     -1.509      0.131        -0.042     0.005
mansyon_dumy               0.2906      0.001    276.959      0.000         0.289     0.293
teiki_syakuya_dummy        0.0453      0.036      1.271      0.204        -0.025     0.115
walk_minute_dummy         -0.0016      0.002     -0.703      0.482        -0.006     0.003
r                         -0.0584      0.010     -6.115      0.000        -0.077    -0.040
rc_dummy                  -0.0065      0.013     -0.508      0.611        -0.031     0.019
room_nums                 -0.0545      0.008     -6.534      0.000        -0.071    -0.038
square6                    0.0107      0.000     41.292      0.000         0.010     0.011
k6                        -0.0384      0.009     -4.169      0.000        -0.057    -0.020
lk6                             0          0        nan        nan             0         0
dk6                       -0.0319      0.011     -2.971      0.003        -0.053    -0.011
sdk6                       0.0494      0.076      0.650      0.516        -0.100     0.199
sldk6                     -0.1567      0.049     -3.203      0.001        -0.253    -0.061
south_direction_dummy6    -0.0175      0.008     -2.250      0.025        -0.033    -0.002
mansyon_dumy6              0.2906      0.001    276.959      0.000         0.289     0.293
walk_minute_dummy6        -0.0016      0.002     -0.703      0.482        -0.006     0.003
r6                        -0.0584      0.010     -6.115      0.000        -0.077    -0.040
rc_dummy6                 -0.0065      0.013     -0.508      0.611        -0.031     0.019
room_nums6                -0.0545      0.008     -6.534      0.000        -0.071    -0.038
==============================================================================
Omnibus:                      537.549   Durbin-Watson:                   1.208
Prob(Omnibus):                  0.000   Jarque-Bera (JB):           206062.773
Skew:                           0.044   Prob(JB):                         0.00
Kurtosis:                      60.631   Cond. No.                     2.10e+16
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The smallest eigenvalue is 9.88e-27. This might indicate that there are
strong multicollinearity problems or that the design matrix is singular.

In [102]:
varname


Out[102]:
['square 0',
 'k 0',
 'lk 0',
 'dk 0',
 'sdk 0',
 'sldk 0',
 'south_direction_dummy 0',
 'new_dummy 0',
 'mansyon_dumy 0',
 'teiki_syakuya_dummy 0',
 'walk_minute_dummy 0',
 'r 0',
 'rc_dummy 0',
 'room_nums 0',
 'square 2',
 'k 2',
 'lk 2',
 'dk 2',
 'sdk 2',
 'sldk 2',
 'south_direction_dummy 2',
 'new_dummy 2',
 'mansyon_dumy 2',
 'teiki_syakuya_dummy 2',
 'walk_minute_dummy 2',
 'r 2',
 'rc_dummy 2',
 'room_nums 2',
 'square 3',
 'k 3',
 'lk 3',
 'dk 3',
 'sdk 3',
 'sldk 3',
 'south_direction_dummy 3',
 'new_dummy 3',
 'mansyon_dumy 3',
 'teiki_syakuya_dummy 3',
 'walk_minute_dummy 3',
 'r 3',
 'rc_dummy 3',
 'room_nums 3',
 'square 5',
 'k 5',
 'lk 5',
 'dk 5',
 'sdk 5',
 'sldk 5',
 'south_direction_dummy 5',
 'new_dummy 5',
 'mansyon_dumy 5',
 'teiki_syakuya_dummy 5',
 'walk_minute_dummy 5',
 'r 5',
 'rc_dummy 5',
 'room_nums 5',
 'square 6',
 'k 6',
 'lk 6',
 'dk 6',
 'sdk 6',
 'sldk 6',
 'south_direction_dummy 6',
 'new_dummy 6',
 'mansyon_dumy 6',
 'teiki_syakuya_dummy 6',
 'walk_minute_dummy 6',
 'r 6',
 'rc_dummy 6',
 'room_nums 6',
 'square 7',
 'k 7',
 'lk 7',
 'dk 7',
 'sdk 7',
 'sldk 7',
 'south_direction_dummy 7',
 'new_dummy 7',
 'mansyon_dumy 7',
 'teiki_syakuya_dummy 7',
 'walk_minute_dummy 7',
 'r 7',
 'rc_dummy 7',
 'room_nums 7',
 'square 8',
 'k 8',
 'lk 8',
 'dk 8',
 'sdk 8',
 'sldk 8',
 'south_direction_dummy 8',
 'new_dummy 8',
 'mansyon_dumy 8',
 'teiki_syakuya_dummy 8',
 'walk_minute_dummy 8',
 'r 8',
 'rc_dummy 8',
 'room_nums 8',
 'square 9',
 'k 9',
 'lk 9',
 'dk 9',
 'sdk 9',
 'sldk 9',
 'south_direction_dummy 9',
 'new_dummy 9',
 'mansyon_dumy 9',
 'teiki_syakuya_dummy 9',
 'walk_minute_dummy 9',
 'r 9',
 'rc_dummy 9',
 'room_nums 9',
 'square 10',
 'k 10',
 'lk 10',
 'dk 10',
 'sdk 10',
 'sldk 10',
 'south_direction_dummy 10',
 'new_dummy 10',
 'mansyon_dumy 10',
 'teiki_syakuya_dummy 10',
 'walk_minute_dummy 10',
 'r 10',
 'rc_dummy 10',
 'room_nums 10',
 'square 11',
 'k 11',
 'lk 11',
 'dk 11',
 'sdk 11',
 'sldk 11',
 'south_direction_dummy 11',
 'new_dummy 11',
 'mansyon_dumy 11',
 'teiki_syakuya_dummy 11',
 'walk_minute_dummy 11',
 'r 11',
 'rc_dummy 11',
 'room_nums 11',
 'square 12',
 'k 12',
 'lk 12',
 'dk 12',
 'sdk 12',
 'sldk 12',
 'south_direction_dummy 12',
 'new_dummy 12',
 'mansyon_dumy 12',
 'teiki_syakuya_dummy 12',
 'walk_minute_dummy 12',
 'r 12',
 'rc_dummy 12',
 'room_nums 12',
 'square 13',
 'k 13',
 'lk 13',
 'dk 13',
 'sdk 13',
 'sldk 13',
 'south_direction_dummy 13',
 'new_dummy 13',
 'mansyon_dumy 13',
 'teiki_syakuya_dummy 13',
 'walk_minute_dummy 13',
 'r 13',
 'rc_dummy 13',
 'room_nums 13',
 'square 14',
 'k 14',
 'lk 14',
 'dk 14',
 'sdk 14',
 'sldk 14',
 'south_direction_dummy 14',
 'new_dummy 14',
 'mansyon_dumy 14',
 'teiki_syakuya_dummy 14',
 'walk_minute_dummy 14',
 'r 14',
 'rc_dummy 14',
 'room_nums 14',
 'square 15',
 'k 15',
 'lk 15',
 'dk 15',
 'sdk 15',
 'sldk 15',
 'south_direction_dummy 15',
 'new_dummy 15',
 'mansyon_dumy 15',
 'teiki_syakuya_dummy 15',
 'walk_minute_dummy 15',
 'r 15',
 'rc_dummy 15',
 'room_nums 15',
 'square 16',
 'k 16',
 'lk 16',
 'dk 16',
 'sdk 16',
 'sldk 16',
 'south_direction_dummy 16',
 'new_dummy 16',
 'mansyon_dumy 16',
 'teiki_syakuya_dummy 16',
 'walk_minute_dummy 16',
 'r 16',
 'rc_dummy 16',
 'room_nums 16',
 'square 17',
 'k 17',
 'lk 17',
 'dk 17',
 'sdk 17',
 'sldk 17',
 'south_direction_dummy 17',
 'new_dummy 17',
 'mansyon_dumy 17',
 'teiki_syakuya_dummy 17',
 'walk_minute_dummy 17',
 'r 17',
 'rc_dummy 17',
 'room_nums 17',
 'square 18',
 'k 18',
 'lk 18',
 'dk 18',
 'sdk 18',
 'sldk 18',
 'south_direction_dummy 18',
 'new_dummy 18',
 'mansyon_dumy 18',
 'teiki_syakuya_dummy 18',
 'walk_minute_dummy 18',
 'r 18',
 'rc_dummy 18',
 'room_nums 18',
 'square 19',
 'k 19',
 'lk 19',
 'dk 19',
 'sdk 19',
 'sldk 19',
 'south_direction_dummy 19',
 'new_dummy 19',
 'mansyon_dumy 19',
 'teiki_syakuya_dummy 19',
 'walk_minute_dummy 19',
 'r 19',
 'rc_dummy 19',
 'room_nums 19',
 'square 20',
 'k 20',
 'lk 20',
 'dk 20',
 'sdk 20',
 'sldk 20',
 'south_direction_dummy 20',
 'new_dummy 20',
 'mansyon_dumy 20',
 'teiki_syakuya_dummy 20',
 'walk_minute_dummy 20',
 'r 20',
 'rc_dummy 20',
 'room_nums 20',
 'square 21',
 'k 21',
 'lk 21',
 'dk 21',
 'sdk 21',
 'sldk 21',
 'south_direction_dummy 21',
 'new_dummy 21',
 'mansyon_dumy 21',
 'teiki_syakuya_dummy 21',
 'walk_minute_dummy 21',
 'r 21',
 'rc_dummy 21',
 'room_nums 21',
 'square 23',
 'k 23',
 'lk 23',
 'dk 23',
 'sdk 23',
 'sldk 23',
 'south_direction_dummy 23',
 'new_dummy 23',
 'mansyon_dumy 23',
 'teiki_syakuya_dummy 23',
 'walk_minute_dummy 23',
 'r 23',
 'rc_dummy 23',
 'room_nums 23',
 'square 24',
 'k 24',
 'lk 24',
 'dk 24',
 'sdk 24',
 'sldk 24',
 'south_direction_dummy 24',
 'new_dummy 24',
 'mansyon_dumy 24',
 'teiki_syakuya_dummy 24',
 'walk_minute_dummy 24',
 'r 24',
 'rc_dummy 24',
 'room_nums 24',
 'square 25',
 'k 25',
 'lk 25',
 'dk 25',
 'sdk 25',
 'sldk 25',
 'south_direction_dummy 25',
 'new_dummy 25',
 'mansyon_dumy 25',
 'teiki_syakuya_dummy 25',
 'walk_minute_dummy 25',
 'r 25',
 'rc_dummy 25',
 'room_nums 25',
 'square 26',
 'k 26',
 'lk 26',
 'dk 26',
 'sdk 26',
 'sldk 26',
 'south_direction_dummy 26',
 'new_dummy 26',
 'mansyon_dumy 26',
 'teiki_syakuya_dummy 26',
 'walk_minute_dummy 26',
 'r 26',
 'rc_dummy 26',
 'room_nums 26',
 'square 27',
 'k 27',
 'lk 27',
 'dk 27',
 'sdk 27',
 'sldk 27',
 'south_direction_dummy 27',
 'new_dummy 27',
 'mansyon_dumy 27',
 'teiki_syakuya_dummy 27',
 'walk_minute_dummy 27',
 'r 27',
 'rc_dummy 27',
 'room_nums 27',
 'square 28',
 'k 28',
 'lk 28',
 'dk 28',
 'sdk 28',
 'sldk 28',
 'south_direction_dummy 28',
 'new_dummy 28',
 'mansyon_dumy 28',
 'teiki_syakuya_dummy 28',
 'walk_minute_dummy 28',
 'r 28',
 'rc_dummy 28',
 'room_nums 28',
 'square 30',
 'k 30',
 'lk 30',
 'dk 30',
 'sdk 30',
 'sldk 30',
 'south_direction_dummy 30',
 'new_dummy 30',
 'mansyon_dumy 30',
 'teiki_syakuya_dummy 30',
 'walk_minute_dummy 30',
 'r 30',
 'rc_dummy 30',
 'room_nums 30']

In [104]:
vars = ['pay', 'square', 'k', 'lk', 'dk', 'sdk', 'sldk', 'south_direction_dummy', 'building_year', 
        'new_dummy', 'mansyon_dumy', 'teiki_syakuya_dummy', 'walk_minute_dummy', 'r', 'rc_dummy', 'room_nums'] + varname
eq = fml_build(vars)

In [105]:
eq


Out[105]:
'pay~+square+k+lk+dk+sdk+sldk+south_direction_dummy+building_year+new_dummy+mansyon_dumy+teiki_syakuya_dummy+walk_minute_dummy+r+rc_dummy+room_nums+square 0+k 0+lk 0+dk 0+sdk 0+sldk 0+south_direction_dummy 0+new_dummy 0+mansyon_dumy 0+teiki_syakuya_dummy 0+walk_minute_dummy 0+r 0+rc_dummy 0+room_nums 0+square 2+k 2+lk 2+dk 2+sdk 2+sldk 2+south_direction_dummy 2+new_dummy 2+mansyon_dumy 2+teiki_syakuya_dummy 2+walk_minute_dummy 2+r 2+rc_dummy 2+room_nums 2+square 3+k 3+lk 3+dk 3+sdk 3+sldk 3+south_direction_dummy 3+new_dummy 3+mansyon_dumy 3+teiki_syakuya_dummy 3+walk_minute_dummy 3+r 3+rc_dummy 3+room_nums 3+square 5+k 5+lk 5+dk 5+sdk 5+sldk 5+south_direction_dummy 5+new_dummy 5+mansyon_dumy 5+teiki_syakuya_dummy 5+walk_minute_dummy 5+r 5+rc_dummy 5+room_nums 5+square 6+k 6+lk 6+dk 6+sdk 6+sldk 6+south_direction_dummy 6+new_dummy 6+mansyon_dumy 6+teiki_syakuya_dummy 6+walk_minute_dummy 6+r 6+rc_dummy 6+room_nums 6+square 7+k 7+lk 7+dk 7+sdk 7+sldk 7+south_direction_dummy 7+new_dummy 7+mansyon_dumy 7+teiki_syakuya_dummy 7+walk_minute_dummy 7+r 7+rc_dummy 7+room_nums 7+square 8+k 8+lk 8+dk 8+sdk 8+sldk 8+south_direction_dummy 8+new_dummy 8+mansyon_dumy 8+teiki_syakuya_dummy 8+walk_minute_dummy 8+r 8+rc_dummy 8+room_nums 8+square 9+k 9+lk 9+dk 9+sdk 9+sldk 9+south_direction_dummy 9+new_dummy 9+mansyon_dumy 9+teiki_syakuya_dummy 9+walk_minute_dummy 9+r 9+rc_dummy 9+room_nums 9+square 10+k 10+lk 10+dk 10+sdk 10+sldk 10+south_direction_dummy 10+new_dummy 10+mansyon_dumy 10+teiki_syakuya_dummy 10+walk_minute_dummy 10+r 10+rc_dummy 10+room_nums 10+square 11+k 11+lk 11+dk 11+sdk 11+sldk 11+south_direction_dummy 11+new_dummy 11+mansyon_dumy 11+teiki_syakuya_dummy 11+walk_minute_dummy 11+r 11+rc_dummy 11+room_nums 11+square 12+k 12+lk 12+dk 12+sdk 12+sldk 12+south_direction_dummy 12+new_dummy 12+mansyon_dumy 12+teiki_syakuya_dummy 12+walk_minute_dummy 12+r 12+rc_dummy 12+room_nums 12+square 13+k 13+lk 13+dk 13+sdk 13+sldk 13+south_direction_dummy 13+new_dummy 13+mansyon_dumy 13+teiki_syakuya_dummy 13+walk_minute_dummy 13+r 13+rc_dummy 13+room_nums 13+square 14+k 14+lk 14+dk 14+sdk 14+sldk 14+south_direction_dummy 14+new_dummy 14+mansyon_dumy 14+teiki_syakuya_dummy 14+walk_minute_dummy 14+r 14+rc_dummy 14+room_nums 14+square 15+k 15+lk 15+dk 15+sdk 15+sldk 15+south_direction_dummy 15+new_dummy 15+mansyon_dumy 15+teiki_syakuya_dummy 15+walk_minute_dummy 15+r 15+rc_dummy 15+room_nums 15+square 16+k 16+lk 16+dk 16+sdk 16+sldk 16+south_direction_dummy 16+new_dummy 16+mansyon_dumy 16+teiki_syakuya_dummy 16+walk_minute_dummy 16+r 16+rc_dummy 16+room_nums 16+square 17+k 17+lk 17+dk 17+sdk 17+sldk 17+south_direction_dummy 17+new_dummy 17+mansyon_dumy 17+teiki_syakuya_dummy 17+walk_minute_dummy 17+r 17+rc_dummy 17+room_nums 17+square 18+k 18+lk 18+dk 18+sdk 18+sldk 18+south_direction_dummy 18+new_dummy 18+mansyon_dumy 18+teiki_syakuya_dummy 18+walk_minute_dummy 18+r 18+rc_dummy 18+room_nums 18+square 19+k 19+lk 19+dk 19+sdk 19+sldk 19+south_direction_dummy 19+new_dummy 19+mansyon_dumy 19+teiki_syakuya_dummy 19+walk_minute_dummy 19+r 19+rc_dummy 19+room_nums 19+square 20+k 20+lk 20+dk 20+sdk 20+sldk 20+south_direction_dummy 20+new_dummy 20+mansyon_dumy 20+teiki_syakuya_dummy 20+walk_minute_dummy 20+r 20+rc_dummy 20+room_nums 20+square 21+k 21+lk 21+dk 21+sdk 21+sldk 21+south_direction_dummy 21+new_dummy 21+mansyon_dumy 21+teiki_syakuya_dummy 21+walk_minute_dummy 21+r 21+rc_dummy 21+room_nums 21+square 23+k 23+lk 23+dk 23+sdk 23+sldk 23+south_direction_dummy 23+new_dummy 23+mansyon_dumy 23+teiki_syakuya_dummy 23+walk_minute_dummy 23+r 23+rc_dummy 23+room_nums 23+square 24+k 24+lk 24+dk 24+sdk 24+sldk 24+south_direction_dummy 24+new_dummy 24+mansyon_dumy 24+teiki_syakuya_dummy 24+walk_minute_dummy 24+r 24+rc_dummy 24+room_nums 24+square 25+k 25+lk 25+dk 25+sdk 25+sldk 25+south_direction_dummy 25+new_dummy 25+mansyon_dumy 25+teiki_syakuya_dummy 25+walk_minute_dummy 25+r 25+rc_dummy 25+room_nums 25+square 26+k 26+lk 26+dk 26+sdk 26+sldk 26+south_direction_dummy 26+new_dummy 26+mansyon_dumy 26+teiki_syakuya_dummy 26+walk_minute_dummy 26+r 26+rc_dummy 26+room_nums 26+square 27+k 27+lk 27+dk 27+sdk 27+sldk 27+south_direction_dummy 27+new_dummy 27+mansyon_dumy 27+teiki_syakuya_dummy 27+walk_minute_dummy 27+r 27+rc_dummy 27+room_nums 27+square 28+k 28+lk 28+dk 28+sdk 28+sldk 28+south_direction_dummy 28+new_dummy 28+mansyon_dumy 28+teiki_syakuya_dummy 28+walk_minute_dummy 28+r 28+rc_dummy 28+room_nums 28+square 30+k 30+lk 30+dk 30+sdk 30+sldk 30+south_direction_dummy 30+new_dummy 30+mansyon_dumy 30+teiki_syakuya_dummy 30+walk_minute_dummy 30+r 30+rc_dummy 30+room_nums 30'

In [107]:
y, X = dmatrices(eq, data=df_by, return_type='dataframe')


  File "<unknown>", line 1
    square 16
            ^
SyntaxError: invalid syntax

In [110]:
df_by['square 16']


Out[110]:
0       0
1       0
2       0
3       0
4       0
5       0
6       0
7       0
8       0
9       0
10      0
11      0
12      0
13      0
14      0
15      0
16      0
17      0
18      0
19      0
20      0
21      0
22      0
23      0
24      0
25      0
26      0
27      0
28      0
29      0
       ..
1459    0
1460    0
1461    0
1462    0
1463    0
1464    0
1465    0
1466    0
1467    0
1468    0
1469    0
1470    0
1471    0
1472    0
1473    0
1474    0
1475    0
1476    0
1477    0
1478    0
1479    0
1480    0
1481    0
1482    0
1483    0
1484    0
1485    0
1486    0
1487    0
1488    0
Name: square 16, dtype: float64

In [ ]: