notebook.community

Edit and run



In [21]:

    
import pandas as pd



In [22]:

    
empl_data = pd.read_csv('fixtures/training_data.csv')



In [23]:

    
empl_data.info()









    



<class 'pandas.core.frame.DataFrame'>
Int64Index: 108 entries, 0 to 107
Data columns (total 34 columns):
job_growth                108 non-null float64
job_growth_min_1          108 non-null float64
job_growth_min_3          108 non-null float64
job_growth_min_6          108 non-null float64
job_growth_min_12         108 non-null float64
avg_weeks_unemp           108 non-null float64
avg_weeks_unemp_min_1     108 non-null float64
avg_weeks_unemp_min_3     108 non-null float64
avg_weeks_unemp_min_6     108 non-null float64
avg_weeks_unemp_min_12    108 non-null float64
emp_pop_ratio             108 non-null float64
lbr_frc_prtcp             108 non-null float64
not_in_lbr_frc            108 non-null float64
totl_emp_payrl            108 non-null float64
unemp_rate                108 non-null float64
fed_fund_rate             108 non-null float64
mortgage_rate             108 non-null float64
import_idx                108 non-null float64
export_idx                108 non-null float64
cpi_idx                   108 non-null float64
cpi_index_min_1           108 non-null float64
cpi_index_min_3           108 non-null float64
cpi_index_min_6           108 non-null float64
cpi_index_min_12          108 non-null float64
taxes                     108 non-null float64
taxes_min_1               108 non-null int64
taxes_min_3               108 non-null int64
taxes_min_6               108 non-null int64
taxes_min_12              108 non-null int64
spending                  108 non-null int64
spending_min_1            108 non-null int64
spending_min_3            108 non-null int64
spending_min_6            108 non-null int64
spending_min_12           108 non-null int64
dtypes: float64(25), int64(9)



In [24]:

    
empl_data.head()









    Out[24]:






  
    
      
      job_growth
      job_growth_min_1
      job_growth_min_3
      job_growth_min_6
      job_growth_min_12
      avg_weeks_unemp
      avg_weeks_unemp_min_1
      avg_weeks_unemp_min_3
      avg_weeks_unemp_min_6
      avg_weeks_unemp_min_12
      ...
      taxes
      taxes_min_1
      taxes_min_3
      taxes_min_6
      taxes_min_12
      spending
      spending_min_1
      spending_min_3
      spending_min_6
      spending_min_12
    
  
  
    
      0
       0.085638
      -0.075590
       0.175644
       0.274477
       0.050552
      -6.8
      -6.890756
      -6.790756
      -7.490756
      -6.190756
      ...
        5886.6
       19419
      -59434
      -61915
      -104321
      -64514
      -40315
      -65111
      -55050
      -69916
    
    
      1
       0.099825
       0.085638
       0.357112
       0.012181
      -0.064240
      -7.2
      -6.790756
      -6.590756
      -7.490756
      -6.490756
      ...
      -95459.4
        5887
      -61783
      -58601
       -63905
      -43965
      -64514
      -66908
      -79764
      -53288
    
    
      2
       0.191616
       0.099825
      -0.075590
      -0.061617
       0.163955
      -6.8
      -7.190756
      -6.890756
      -6.890756
      -6.690756
      ...
      -47571.4
      -95459
       19419
       11021
        23761
      -38142
      -43965
      -40315
      -75885
      -56112
    
    
      3
       0.426579
       0.191616
       0.085638
       0.175644
       0.124027
      -6.7
      -6.790756
      -6.790756
      -6.790756
      -6.490756
      ...
       81283.6
      -47571
        5887
      -59434
       -80880
      -38935
      -38142
      -64514
      -65111
      -80712
    
    
      4
       0.251320
       0.426579
       0.099825
       0.357112
       0.231902
      -7.7
      -6.690756
      -7.190756
      -6.590756
      -5.790756
      ...
      -43599.4
       81284
      -95459
      -61783
        18052
      -69705
      -38935
      -43965
      -66908
      -63367
    
  

5 rows × 34 columns



In [25]:

    
import statsmodels.api as sm



In [35]:

    
y = empl_data.job_growth



In [36]:

    
X = empl_data.ix[:,1:34]
X.head()









    Out[36]:






  
    
      
      job_growth_min_1
      job_growth_min_3
      job_growth_min_6
      job_growth_min_12
      avg_weeks_unemp
      avg_weeks_unemp_min_1
      avg_weeks_unemp_min_3
      avg_weeks_unemp_min_6
      avg_weeks_unemp_min_12
      emp_pop_ratio
      ...
      taxes
      taxes_min_1
      taxes_min_3
      taxes_min_6
      taxes_min_12
      spending
      spending_min_1
      spending_min_3
      spending_min_6
      spending_min_12
    
  
  
    
      0
      -0.075590
       0.175644
       0.274477
       0.050552
      -6.8
      -6.890756
      -6.790756
      -7.490756
      -6.190756
       1.7
      ...
        5886.6
       19419
      -59434
      -61915
      -104321
      -64514
      -40315
      -65111
      -55050
      -69916
    
    
      1
       0.085638
       0.357112
       0.012181
      -0.064240
      -7.2
      -6.790756
      -6.590756
      -7.490756
      -6.490756
       1.7
      ...
      -95459.4
        5887
      -61783
      -58601
       -63905
      -43965
      -64514
      -66908
      -79764
      -53288
    
    
      2
       0.099825
      -0.075590
      -0.061617
       0.163955
      -6.8
      -7.190756
      -6.890756
      -6.890756
      -6.690756
       1.7
      ...
      -47571.4
      -95459
       19419
       11021
        23761
      -38142
      -43965
      -40315
      -75885
      -56112
    
    
      3
       0.191616
       0.085638
       0.175644
       0.124027
      -6.7
      -6.790756
      -6.790756
      -6.790756
      -6.490756
       2.0
      ...
       81283.6
      -47571
        5887
      -59434
       -80880
      -38935
      -38142
      -64514
      -65111
      -80712
    
    
      4
       0.426579
       0.099825
       0.357112
       0.231902
      -7.7
      -6.690756
      -7.190756
      -6.590756
      -5.790756
       2.1
      ...
      -43599.4
       81284
      -95459
      -61783
        18052
      -69705
      -38935
      -43965
      -66908
      -63367
    
  

5 rows × 33 columns



In [40]:

    
X = sm.add_constant(X)



In [41]:

    
est = sm.OLS(y,X).fit()



In [42]:

    
est.summary()









    Out[42]:





OLS Regression Results

  Dep. Variable:        job_growth       R-squared:             0.619


  Model:                    OLS          Adj. R-squared:        0.449


  Method:              Least Squares     F-statistic:           3.643


  Date:              Sat, 07 Mar 2015    Prob (F-statistic):  2.00e-06


  Time:                  13:11:28        Log-Likelihood:       47.212


  No. Observations:          108         AIC:                  -26.42


  Df Residuals:               74         BIC:                   64.77


  Df Model:                   33                                     




                            coef      std err       t       P>|t|  [95.0% Conf. Int.] 


  const                       0.1105      0.096      1.147   0.255     -0.081     0.302


  job_growth_min_1           -0.2519      0.104     -2.422   0.018     -0.459    -0.045


  job_growth_min_3           -0.0113      0.116     -0.097   0.923     -0.243     0.220


  job_growth_min_6           -0.1333      0.114     -1.168   0.247     -0.361     0.094


  job_growth_min_12          -0.1083      0.117     -0.928   0.357     -0.341     0.124


  avg_weeks_unemp             0.0895      0.033      2.700   0.009      0.023     0.156


  avg_weeks_unemp_min_1      -0.0389      0.036     -1.085   0.281     -0.110     0.033


  avg_weeks_unemp_min_3      -0.0063      0.029     -0.221   0.826     -0.063     0.051


  avg_weeks_unemp_min_6       0.0673      0.028      2.388   0.019      0.011     0.123


  avg_weeks_unemp_min_12     -0.0543      0.019     -2.783   0.007     -0.093    -0.015


  emp_pop_ratio               0.6522      0.484      1.347   0.182     -0.313     1.617


  lbr_frc_prtcp              -0.3543      0.448     -0.790   0.432     -1.248     0.539


  not_in_lbr_frc              0.0002      0.000      0.657   0.513     -0.000     0.001


  totl_emp_payrl             -0.0001   9.24e-05     -1.421   0.160     -0.000  5.28e-05


  unemp_rate                 -0.0204      0.332     -0.061   0.951     -0.683     0.642


  fed_fund_rate               0.0198      0.041      0.482   0.631     -0.062     0.102


  mortgage_rate               0.0028      0.101      0.027   0.978     -0.199     0.204


  import_idx                  0.0057      0.016      0.366   0.715     -0.025     0.037


  export_idx                 -0.0379      0.020     -1.895   0.062     -0.078     0.002


  cpi_idx                     0.0527      0.041      1.283   0.204     -0.029     0.134


  cpi_index_min_1            -0.0195      0.032     -0.606   0.547     -0.084     0.045


  cpi_index_min_3             0.0078      0.018      0.432   0.667     -0.028     0.044


  cpi_index_min_6            -0.0036      0.014     -0.265   0.792     -0.031     0.024


  cpi_index_min_12            0.0112      0.017      0.667   0.507     -0.022     0.045


  taxes                   -4.267e-07   4.08e-07     -1.046   0.299  -1.24e-06  3.86e-07


  taxes_min_1             -2.865e-07   4.19e-07     -0.684   0.496  -1.12e-06  5.48e-07


  taxes_min_3              -2.41e-07   3.65e-07     -0.659   0.512  -9.69e-07  4.87e-07


  taxes_min_6             -3.078e-07   3.45e-07     -0.891   0.376  -9.96e-07  3.81e-07


  taxes_min_12            -1.241e-07   4.45e-07     -0.279   0.781  -1.01e-06  7.64e-07


  spending                -8.608e-07    7.6e-07     -1.133   0.261  -2.38e-06  6.54e-07


  spending_min_1          -2.734e-07   8.06e-07     -0.339   0.735  -1.88e-06  1.33e-06


  spending_min_3          -3.801e-07   7.04e-07     -0.540   0.591  -1.78e-06  1.02e-06


  spending_min_6           3.091e-07   7.06e-07      0.438   0.663   -1.1e-06  1.72e-06


  spending_min_12         -8.209e-08   7.41e-07     -0.111   0.912  -1.56e-06  1.39e-06




  Omnibus:         0.311    Durbin-Watson:         1.874


  Prob(Omnibus):   0.856    Jarque-Bera (JB):      0.091


  Skew:            0.058    Prob(JB):              0.955


  Kurtosis:        3.083    Cond. No.           3.54e+06



In [43]:

    
test_data = pd.read_csv('fixtures/test_data.csv')



In [44]:

    
y_test = test_data.job_growth
X_test = test_data.ix[:,1:34]



In [45]:

    
est.predict(X_test)









    



---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-45-f916d1250932> in <module>()
----> 1 est.predict(X_test)

/Users/itadmin/anaconda/lib/python2.7/site-packages/statsmodels/base/model.pyc in predict(self, exog, transform, *args, **kwargs)
    878             exog = dmatrix(self.model.data.orig_exog.design_info.builder,
    879                     exog)
--> 880         return self.model.predict(self.params, exog, *args, **kwargs)
    881 
    882 

/Users/itadmin/anaconda/lib/python2.7/site-packages/statsmodels/regression/linear_model.pyc in predict(self, params, exog)
    175         if exog is None:
    176             exog = self.exog
--> 177         return np.dot(exog, params)
    178 
    179 class GLS(RegressionModel):

ValueError: shapes (12,33) and (34,) not aligned: 33 (dim 1) != 34 (dim 0)



In [46]:

    
X_test.head()









    Out[46]:






  
    
      
      job_growth_min_1
      job_growth_min_3
      job_growth_min_6
      job_growth_min_12
      avg_weeks_unemp
      avg_weeks_unemp_min_1
      avg_weeks_unemp_min_3
      avg_weeks_unemp_min_6
      avg_weeks_unemp_min_12
      emp_pop_ratio
      ...
      taxes
      taxes_min_1
      taxes_min_3
      taxes_min_6
      taxes_min_12
      spending
      spending_min_1
      spending_min_3
      spending_min_6
      spending_min_12
    
  
  
    
      0
       0.12527
      -0.58490
       0.17358
       0.03350
        9.00924
       10.50924
        9.20924
       10.80924
        9.20924
      -1.8521
      ...
        99666.56303
        88710.56303
        2596.56303
         3699.56303
        75894.56303
       47622.15966
      -26803.84034
       30886.15966
       39002.15966
       10715.15966
    
    
      1
       0.10481
       0.72288
       0.00901
       0.07047
       10.60924
        9.00924
       10.50924
       11.10924
       10.40924
      -1.8521
      ...
       -51981.43697
        99666.56303
      -13877.43697
       -10960.43697
       -73515.43697
       79255.15966
       47622.15966
       59054.15966
       74650.15966
       67729.15966
    
    
      2
       0.16141
       0.12527
       0.00624
      -0.03835
        8.90924
       10.60924
       10.50924
       10.90924
       10.60924
      -1.6521
      ...
        19515.56303
       -51981.43697
       88710.56303
       105138.56303
       -10312.43697
       -5883.84034
       79255.15966
      -26803.84034
      -32269.84034
       33923.15966
    
    
      3
       0.14736
       0.10481
      -0.58490
       0.20297
        8.50924
        8.90924
        9.00924
        9.20924
       10.20924
      -1.7521
      ...
       217906.56303
        19515.56303
       99666.56303
         2596.56303
       210392.56303
       48758.15966
       -5883.84034
       47622.15966
       30886.15966
       35209.15966
    
    
      4
       0.22035
       0.16141
       0.72288
       0.15661
        8.00924
        8.50924
       10.60924
       10.50924
       10.60924
      -1.7521
      ...
         3558.56303
       217906.56303
      -51981.43697
       -13877.43697
          851.56303
       71235.15966
       48758.15966
       79255.15966
       59054.15966
       77289.15966
    
  

5 rows × 33 columns



In [47]:

    
X_test = sm.add_constant(X_test)



In [50]:

    
y_predicted = est.predict(X_test)



In [49]:

    
print(y_test)









    



0     0.10481
1     0.16141
2     0.14736
3     0.22035
4     0.16562
5     0.19278
6     0.17512
7     0.14604
8     0.19467
9     0.18712
10    0.25261
11    0.17988
Name: job_growth, dtype: float64



In [64]:

    
SSE = ((y_predicted - y_test)**2).sum()



In [65]:

    
SST = ((y_predicted - y_test.mean())**2).sum()



In [66]:

    
# Sum of Squared Errors/Residuals
print(SSE)









    



0.316107835165



In [67]:

    
# Sum of Squared Total
print(SST)









    



0.275184725735



In [69]:

    
# R-squared
R2 = 1 - (SSE/SST)
print(R2)









    



-0.148711413107



In [ ]:

	job_growth	job_growth_min_1	job_growth_min_3	job_growth_min_6	job_growth_min_12	avg_weeks_unemp	avg_weeks_unemp_min_1	avg_weeks_unemp_min_3	avg_weeks_unemp_min_6	avg_weeks_unemp_min_12	...	taxes	taxes_min_1	taxes_min_3	taxes_min_6	taxes_min_12	spending	spending_min_1	spending_min_3	spending_min_6	spending_min_12
0	0.085638	-0.075590	0.175644	0.274477	0.050552	-6.8	-6.890756	-6.790756	-7.490756	-6.190756	...	5886.6	19419	-59434	-61915	-104321	-64514	-40315	-65111	-55050	-69916
1	0.099825	0.085638	0.357112	0.012181	-0.064240	-7.2	-6.790756	-6.590756	-7.490756	-6.490756	...	-95459.4	5887	-61783	-58601	-63905	-43965	-64514	-66908	-79764	-53288
2	0.191616	0.099825	-0.075590	-0.061617	0.163955	-6.8	-7.190756	-6.890756	-6.890756	-6.690756	...	-47571.4	-95459	19419	11021	23761	-38142	-43965	-40315	-75885	-56112
3	0.426579	0.191616	0.085638	0.175644	0.124027	-6.7	-6.790756	-6.790756	-6.790756	-6.490756	...	81283.6	-47571	5887	-59434	-80880	-38935	-38142	-64514	-65111	-80712
4	0.251320	0.426579	0.099825	0.357112	0.231902	-7.7	-6.690756	-7.190756	-6.590756	-5.790756	...	-43599.4	81284	-95459	-61783	18052	-69705	-38935	-43965	-66908	-63367

Dep. Variable:	job_growth	R-squared:	0.619
Model:	OLS	Adj. R-squared:	0.449
Method:	Least Squares	F-statistic:	3.643
Date:	Sat, 07 Mar 2015	Prob (F-statistic):	2.00e-06
Time:	13:11:28	Log-Likelihood:	47.212
No. Observations:	108	AIC:	-26.42
Df Residuals:	74	BIC:	64.77
Df Model:	33

	coef	std err	t	P>\|t\|	[95.0% Conf. Int.]
const	0.1105	0.096	1.147	0.255	-0.081 0.302
job_growth_min_1	-0.2519	0.104	-2.422	0.018	-0.459 -0.045
job_growth_min_3	-0.0113	0.116	-0.097	0.923	-0.243 0.220
job_growth_min_6	-0.1333	0.114	-1.168	0.247	-0.361 0.094
job_growth_min_12	-0.1083	0.117	-0.928	0.357	-0.341 0.124
avg_weeks_unemp	0.0895	0.033	2.700	0.009	0.023 0.156
avg_weeks_unemp_min_1	-0.0389	0.036	-1.085	0.281	-0.110 0.033
avg_weeks_unemp_min_3	-0.0063	0.029	-0.221	0.826	-0.063 0.051
avg_weeks_unemp_min_6	0.0673	0.028	2.388	0.019	0.011 0.123
avg_weeks_unemp_min_12	-0.0543	0.019	-2.783	0.007	-0.093 -0.015
emp_pop_ratio	0.6522	0.484	1.347	0.182	-0.313 1.617
lbr_frc_prtcp	-0.3543	0.448	-0.790	0.432	-1.248 0.539
not_in_lbr_frc	0.0002	0.000	0.657	0.513	-0.000 0.001
totl_emp_payrl	-0.0001	9.24e-05	-1.421	0.160	-0.000 5.28e-05
unemp_rate	-0.0204	0.332	-0.061	0.951	-0.683 0.642
fed_fund_rate	0.0198	0.041	0.482	0.631	-0.062 0.102
mortgage_rate	0.0028	0.101	0.027	0.978	-0.199 0.204
import_idx	0.0057	0.016	0.366	0.715	-0.025 0.037
export_idx	-0.0379	0.020	-1.895	0.062	-0.078 0.002
cpi_idx	0.0527	0.041	1.283	0.204	-0.029 0.134
cpi_index_min_1	-0.0195	0.032	-0.606	0.547	-0.084 0.045
cpi_index_min_3	0.0078	0.018	0.432	0.667	-0.028 0.044
cpi_index_min_6	-0.0036	0.014	-0.265	0.792	-0.031 0.024
cpi_index_min_12	0.0112	0.017	0.667	0.507	-0.022 0.045
taxes	-4.267e-07	4.08e-07	-1.046	0.299	-1.24e-06 3.86e-07
taxes_min_1	-2.865e-07	4.19e-07	-0.684	0.496	-1.12e-06 5.48e-07
taxes_min_3	-2.41e-07	3.65e-07	-0.659	0.512	-9.69e-07 4.87e-07
taxes_min_6	-3.078e-07	3.45e-07	-0.891	0.376	-9.96e-07 3.81e-07
taxes_min_12	-1.241e-07	4.45e-07	-0.279	0.781	-1.01e-06 7.64e-07
spending	-8.608e-07	7.6e-07	-1.133	0.261	-2.38e-06 6.54e-07
spending_min_1	-2.734e-07	8.06e-07	-0.339	0.735	-1.88e-06 1.33e-06
spending_min_3	-3.801e-07	7.04e-07	-0.540	0.591	-1.78e-06 1.02e-06
spending_min_6	3.091e-07	7.06e-07	0.438	0.663	-1.1e-06 1.72e-06
spending_min_12	-8.209e-08	7.41e-07	-0.111	0.912	-1.56e-06 1.39e-06

Omnibus:	0.311	Durbin-Watson:	1.874
Prob(Omnibus):	0.856	Jarque-Bera (JB):	0.091
Skew:	0.058	Prob(JB):	0.955
Kurtosis:	3.083	Cond. No.	3.54e+06

	job_growth_min_1	job_growth_min_3	job_growth_min_6	job_growth_min_12	avg_weeks_unemp	avg_weeks_unemp_min_1	avg_weeks_unemp_min_3	avg_weeks_unemp_min_6	avg_weeks_unemp_min_12	emp_pop_ratio	...	taxes	taxes_min_1	taxes_min_3	taxes_min_6	taxes_min_12	spending	spending_min_1	spending_min_3	spending_min_6	spending_min_12
0	0.12527	-0.58490	0.17358	0.03350	9.00924	10.50924	9.20924	10.80924	9.20924	-1.8521	...	99666.56303	88710.56303	2596.56303	3699.56303	75894.56303	47622.15966	-26803.84034	30886.15966	39002.15966	10715.15966
1	0.10481	0.72288	0.00901	0.07047	10.60924	9.00924	10.50924	11.10924	10.40924	-1.8521	...	-51981.43697	99666.56303	-13877.43697	-10960.43697	-73515.43697	79255.15966	47622.15966	59054.15966	74650.15966	67729.15966
2	0.16141	0.12527	0.00624	-0.03835	8.90924	10.60924	10.50924	10.90924	10.60924	-1.6521	...	19515.56303	-51981.43697	88710.56303	105138.56303	-10312.43697	-5883.84034	79255.15966	-26803.84034	-32269.84034	33923.15966
3	0.14736	0.10481	-0.58490	0.20297	8.50924	8.90924	9.00924	9.20924	10.20924	-1.7521	...	217906.56303	19515.56303	99666.56303	2596.56303	210392.56303	48758.15966	-5883.84034	47622.15966	30886.15966	35209.15966
4	0.22035	0.16141	0.72288	0.15661	8.00924	8.50924	10.60924	10.50924	10.60924	-1.7521	...	3558.56303	217906.56303	-51981.43697	-13877.43697	851.56303	71235.15966	48758.15966	79255.15966	59054.15966	77289.15966