notebook.community

Edit and run



In [50]:

    
from __future__ import division
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf
import statsmodels.discrete.discrete_model as smd
%matplotlib inline
"""import statsmodels.discrete.discrete_model.Probitでimportできないのはなぜ？"""









    Out[50]:





'import statsmodels.discrete.discrete_model.Probit\xe3\x81\xa7import\xe3\x81\xa7\xe3\x81\x8d\xe3\x81\xaa\xe3\x81\x84\xe3\x81\xae\xe3\x81\xaf\xe3\x81\xaa\xe3\x81\x9c\xef\xbc\x9f'



In [3]:

    
df = pd.read_table('punnoise_data.txt')



In [4]:

    
treat = df["treat"]



In [5]:

    
df_noise = df[df["noise"]==1]
df_nonoise = df[df["noise"]==0]



In [6]:

    
df_noise_nopun = df_noise[df_noise["punishment"]==0]
df_noise_reg = df_noise[df_noise["p_reg"]==1]
df_noise_str = df_noise[df_noise["p_strong"]==1]
df_nonoise_nopun = df_nonoise[df_nonoise["punishment"]==0]
df_nonoise_reg = df_nonoise[df_nonoise["p_reg"]==1]
df_nonoise_str = df_nonoise[df_nonoise["p_strong"]==1]



In [16]:

    
n1 = df_nonoise_nopun["group_id"].value_counts().count() *3
n2 = df_nonoise_reg["group_id"].value_counts().count() *3
n3 = df_nonoise_str["group_id"].value_counts().count() *3
n4 = df_noise_nopun["group_id"].value_counts().count() *3
n5 = df_noise_reg["group_id"].value_counts().count() *3
n6 = df_noise_str["group_id"].value_counts().count() *3

print(n1, n2, n3, n4, n5, n6)









    



(57, 57, 54, 57, 60, 54)



In [17]:

    
Avecon1 = df_nonoise_nopun["contribution"].sum()/df_nonoise_nopun["noise"].size
Avecon2 = df_nonoise_reg["contribution"].sum()/df_nonoise_reg["noise"].size
Avecon3 = df_nonoise_str["contribution"].sum() / df_nonoise_str["noise"].size
Avecon4 = df_noise_nopun["contribution"].sum()/df_noise_nopun["noise"].size
Avecon5 = df_noise_reg["contribution"].sum()/df_noise_reg["noise"].size
Avecon6 = df_noise_str["contribution"].sum()/df_noise_str["noise"].size

print (Avecon1, Avecon2, Avecon3, Avecon4, Avecon5, Avecon6)









    



(5.5929824561403505, 12.4, 17.607407407407408, 4.0350877192982457, 9.5999999999999996, 16.037037037037038)



In [18]:

    
Avepun2 = df_nonoise_reg["punishment_cost"].sum() / df_nonoise_nopun["noise"].size
Avepun3 = df_nonoise_str["punishment_cost"].sum() / df_nonoise_str["noise"].size
Avepun5 = df_noise_reg["punishment_cost"].sum() / df_noise_reg["noise"].size
Avepun6 = df_noise_str["punishment_cost"].sum() / df_noise_str["noise"].size

print(Avepun2, Avepun3, Avepun5, Avepun6)









    



(0.64000000000000001, 0.47925925925925927, 1.446, 0.64814814814814814)



In [19]:

    
Avepro1 = df_nonoise_nopun["income"].sum()/df_nonoise_nopun["noise"].size
Avepro2 = df_nonoise_reg["income"].sum()/df_nonoise_reg["noise"].size
Avepro3 = df_nonoise_str["income"].sum()/df_nonoise_str["noise"].size
Avepro4 = df_noise_nopun["income"].sum()/df_nonoise_nopun["noise"].size
Avepro5 = df_noise_reg["income"].sum()/df_noise_reg["noise"].size
Avepro6 = df_noise_str["income"].sum()/df_noise_str["noise"].size
print(Avepro1, Avepro2, Avepro3, Avepro4, Avepro5, Avepro6)









    



(22.796491228070174, 23.656140350877195, 25.448888888888888, 22.017543859649123, 19.103666666666665, 23.481481481481481)



In [43]:

    
grouped1 = df_nonoise_nopun.groupby("round")[["contribution"]]
grouped2 = df_nonoise_reg.groupby("round")[["contribution"]]
grouped3 = df_nonoise_str.groupby("round")[["contribution"]]
grouped4 = df_noise_nopun.groupby("round")[["contribution"]]
grouped5 = df_noise_reg.groupby("round")[["contribution"]]
grouped6 = df_noise_str.groupby("round")[["contribution"]]



In [92]:

    
groupedmean1 = grouped1.mean()
groupedmean2 = grouped2.mean()
groupedmean3 = grouped3.mean()
groupedmean4 = grouped4.mean()
groupedmean5 = grouped5.mean()
groupedmean6 = grouped6.mean()



In [93]:

    
fig = plt.figure()
ax1 = fig.add_subplot(1,2,1)
ax2 = fig.add_subplot(1,2,2, sharey=ax1)
plt.subplots_adjust(wspace=0.2, hspace=0.11)
groupedmean1.plot(ax=ax1,color="k")
groupedmean2.plot(ax=ax1,color="g")
groupedmean3.plot(ax=ax1)
groupedmean4.plot(ax=ax2,color="k")
groupedmean5.plot(ax=ax2,color="g")
groupedmean6.plot(ax=ax2)

ax1.legend(loc='lower center',bbox_to_anchor=(0.5,-0.5))
ax2.legend(loc='lower center',bbox_to_anchor=(0.5,-0.5))









    Out[93]:





<matplotlib.legend.Legend at 0x25848240>



In [95]:

    
grouped_avepro1 = df_nonoise_nopun.groupby("round")[["income"]]
grouped_avepro2 = df_nonoise_reg.groupby("round")[["income"]]
grouped_avepro3 = df_nonoise_str.groupby("round")[["income"]]
grouped_avepro4 = df_noise_nopun.groupby("round")[["income"]]
grouped_avepro5 = df_noise_reg.groupby("round")[["income"]]
grouped_avepro6 = df_noise_str.groupby("round")[["income"]]
grouped_pro_mean1 = grouped_avepro1.mean()
grouped_pro_mean2 = grouped_avepro2.mean()
grouped_pro_mean3 = grouped_avepro3.mean()
grouped_pro_mean4 = grouped_avepro4.mean()
grouped_pro_mean5 = grouped_avepro5.mean()
grouped_pro_mean6 = grouped_avepro6.mean()



In [96]:

    
fig = plt.figure()
ax1 = fig.add_subplot(1,2,1)
ax2 = fig.add_subplot(1,2,2, sharey=ax1)
plt.subplots_adjust(wspace=0.2, hspace=0.11)
grouped_pro_mean1.plot(ax=ax1,color="k")
grouped_pro_mean2.plot(ax=ax1,color="g")
grouped_pro_mean3.plot(ax=ax1)
grouped_pro_mean4.plot(ax=ax2,color="k")
grouped_pro_mean5.plot(ax=ax2,color="g")
grouped_pro_mean6.plot(ax=ax2)

ax1.legend(loc='lower center',bbox_to_anchor=(0.5,-0.5))
ax2.legend(loc='lower center',bbox_to_anchor=(0.5,-0.5))









    Out[96]:





<matplotlib.legend.Legend at 0x25e61ba8>



In [10]:

    
DD = df[["income","round","p_reg","p_strong",
         "noise","noise__p_reg","noise__p_strong"]]



In [12]:

    
"""http://necochan.com/2014/06/07/python-for-economist-6/から引用"""









    Out[12]:





'http://necochan.com/2014/06/07/python-for-economist-6/'



In [36]:

    
varlst=["income","round","p_reg","p_strong",
         "noise","noise__p_reg","noise__p_strong"]



In [ ]:

    
pd.scatter_matrix(DD[varlst], diagonal='kde')



In [37]:

    
def fml_build(varlst):
    varlst.reverse()
    fml=varlst.pop()+'~'
    while len(varlst)>0:
        fml=fml+'+'+varlst.pop()
    return fml
 
eq = fml_build(varlst)



In [38]:

    
eq









    Out[38]:





'income~+round+p_reg+p_strong+noise+noise__p_reg+noise__p_strong'



In [41]:

    
res = smf.ols(formula=eq, data=df).fit()
print res.summary()









    



                            OLS Regression Results                            
==============================================================================
Dep. Variable:                 income   R-squared:                       0.053
Model:                            OLS   Adj. R-squared:                  0.053
Method:                 Least Squares   F-statistic:                     158.7
Date:                Tue, 06 Oct 2015   Prob (F-statistic):          6.51e-197
Time:                        14:15:13   Log-Likelihood:                -60859.
No. Observations:               16950   AIC:                         1.217e+05
Df Residuals:                   16943   BIC:                         1.218e+05
Df Model:                           6                                         
Covariance Type:            nonrobust                                         
===================================================================================
                      coef    std err          t      P>|t|      [95.0% Conf. Int.]
-----------------------------------------------------------------------------------
Intercept          21.5449      0.203    106.155      0.000        21.147    21.943
round               0.0491      0.005     10.510      0.000         0.040     0.058
p_reg               0.8596      0.232      3.699      0.000         0.404     1.315
p_strong            2.6524      0.236     11.257      0.000         2.191     3.114
noise              -0.7789      0.232     -3.351      0.001        -1.235    -0.323
noise__p_reg       -3.7735      0.327    -11.553      0.000        -4.414    -3.133
noise__p_strong    -1.1885      0.333     -3.566      0.000        -1.842    -0.535
==============================================================================
Omnibus:                     5641.993   Durbin-Watson:                   1.025
Prob(Omnibus):                  0.000   Jarque-Bera (JB):            29816.600
Skew:                          -1.514   Prob(JB):                         0.00
Kurtosis:                       8.749   Cond. No.                         224.
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.



In [42]:

    
"""疑問点、R-squaredとadj.~が同じなのはなぜ？
interceptとは
p値がどれも低いがそれはデータと違う
95%conf.とint.とは
"""









    Out[42]:





'\xe7\x96\x91\xe5\x95\x8f\xe7\x82\xb9\xe3\x80\x81R-squared\xe3\x81\xa8adj.~\xe3\x81\x8c\xe5\x90\x8c\xe3\x81\x98\xe3\x81\xaa\xe3\x81\xae\xe3\x81\xaf\xe3\x81\xaa\xe3\x81\x9c\xef\xbc\x9f\nintercept\xe3\x81\xa8\xe3\x81\xaf\np\xe5\x80\xa4\xe3\x81\x8c\xe3\x81\xa9\xe3\x82\x8c\xe3\x82\x82\xe4\xbd\x8e\xe3\x81\x84\xe3\x81\x8c\xe3\x81\x9d\xe3\x82\x8c\xe3\x81\xaf\xe3\x83\x87\xe3\x83\xbc\xe3\x82\xbf\xe3\x81\xa8\xe9\x81\x95\xe3\x81\x86\n95%conf.\xe3\x81\xa8int.\xe3\x81\xa8\xe3\x81\xaf\n'



In [57]:

    
pro_res = smd.Probit(endog=["income","round","p_reg","p_strong","noise","noise__p_reg","noise__p_strong"],exog=DD)









    



---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-57-7ebebeed37eb> in <module>()
----> 1 pro_res = smd.Probit(endog=["income","round","p_reg","p_strong","noise","noise__p_reg","noise__p_strong"],exog=DD)

c:\anaconda\lib\site-packages\statsmodels\discrete\discrete_model.py in __init__(self, endog, exog, **kwargs)
    399 
    400     def __init__(self, endog, exog, **kwargs):
--> 401         super(BinaryModel, self).__init__(endog, exog, **kwargs)
    402         if (self.__class__.__name__ != 'MNLogit' and
    403                 not np.all((self.endog >= 0) & (self.endog <= 1))):

c:\anaconda\lib\site-packages\statsmodels\discrete\discrete_model.py in __init__(self, endog, exog, **kwargs)
    152     """
    153     def __init__(self, endog, exog, **kwargs):
--> 154         super(DiscreteModel, self).__init__(endog, exog, **kwargs)
    155         self.raise_on_perfect_prediction = True
    156 

c:\anaconda\lib\site-packages\statsmodels\base\model.py in __init__(self, endog, exog, **kwargs)
    184 
    185     def __init__(self, endog, exog=None, **kwargs):
--> 186         super(LikelihoodModel, self).__init__(endog, exog, **kwargs)
    187         self.initialize()
    188 

c:\anaconda\lib\site-packages\statsmodels\base\model.py in __init__(self, endog, exog, **kwargs)
     58         hasconst = kwargs.pop('hasconst', None)
     59         self.data = self._handle_data(endog, exog, missing, hasconst,
---> 60                                       **kwargs)
     61         self.k_constant = self.data.k_constant
     62         self.exog = self.data.exog

c:\anaconda\lib\site-packages\statsmodels\base\model.py in _handle_data(self, endog, exog, missing, hasconst, **kwargs)
     82 
     83     def _handle_data(self, endog, exog, missing, hasconst, **kwargs):
---> 84         data = handle_data(endog, exog, missing, hasconst, **kwargs)
     85         # kwargs arrays could have changed, easier to just attach here
     86         for key in kwargs:

c:\anaconda\lib\site-packages\statsmodels\base\data.py in handle_data(endog, exog, missing, hasconst, **kwargs)
    564     klass = handle_data_class_factory(endog, exog)
    565     return klass(endog, exog=exog, missing=missing, hasconst=hasconst,
--> 566                  **kwargs)

c:\anaconda\lib\site-packages\statsmodels\base\data.py in __init__(self, endog, exog, missing, hasconst, **kwargs)
     74         # this has side-effects, attaches k_constant and const_idx
     75         self._handle_constant(hasconst)
---> 76         self._check_integrity()
     77         self._cache = resettable_cache()
     78 

c:\anaconda\lib\site-packages\statsmodels\base\data.py in _check_integrity(self)
    451                 not self.orig_endog.index.equals(self.orig_exog.index)):
    452             raise ValueError("The indices for endog and exog are not aligned")
--> 453         super(PandasData, self)._check_integrity()
    454 
    455     def _get_row_labels(self, arr):

c:\anaconda\lib\site-packages\statsmodels\base\data.py in _check_integrity(self)
    363         if self.exog is not None:
    364             if len(self.exog) != len(self.endog):
--> 365                 raise ValueError("endog and exog matrices are different sizes")
    366 
    367     def wrap_output(self, obj, how='columns', names=None):

ValueError: endog and exog matrices are different sizes



In [ ]: