In [21]:
import pandas as pd

In [22]:
empl_data = pd.read_csv('fixtures/training_data.csv')

In [23]:
empl_data.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 108 entries, 0 to 107
Data columns (total 34 columns):
job_growth                108 non-null float64
job_growth_min_1          108 non-null float64
job_growth_min_3          108 non-null float64
job_growth_min_6          108 non-null float64
job_growth_min_12         108 non-null float64
avg_weeks_unemp           108 non-null float64
avg_weeks_unemp_min_1     108 non-null float64
avg_weeks_unemp_min_3     108 non-null float64
avg_weeks_unemp_min_6     108 non-null float64
avg_weeks_unemp_min_12    108 non-null float64
emp_pop_ratio             108 non-null float64
lbr_frc_prtcp             108 non-null float64
not_in_lbr_frc            108 non-null float64
totl_emp_payrl            108 non-null float64
unemp_rate                108 non-null float64
fed_fund_rate             108 non-null float64
mortgage_rate             108 non-null float64
import_idx                108 non-null float64
export_idx                108 non-null float64
cpi_idx                   108 non-null float64
cpi_index_min_1           108 non-null float64
cpi_index_min_3           108 non-null float64
cpi_index_min_6           108 non-null float64
cpi_index_min_12          108 non-null float64
taxes                     108 non-null float64
taxes_min_1               108 non-null int64
taxes_min_3               108 non-null int64
taxes_min_6               108 non-null int64
taxes_min_12              108 non-null int64
spending                  108 non-null int64
spending_min_1            108 non-null int64
spending_min_3            108 non-null int64
spending_min_6            108 non-null int64
spending_min_12           108 non-null int64
dtypes: float64(25), int64(9)

In [24]:
empl_data.head()


Out[24]:
job_growth job_growth_min_1 job_growth_min_3 job_growth_min_6 job_growth_min_12 avg_weeks_unemp avg_weeks_unemp_min_1 avg_weeks_unemp_min_3 avg_weeks_unemp_min_6 avg_weeks_unemp_min_12 ... taxes taxes_min_1 taxes_min_3 taxes_min_6 taxes_min_12 spending spending_min_1 spending_min_3 spending_min_6 spending_min_12
0 0.085638 -0.075590 0.175644 0.274477 0.050552 -6.8 -6.890756 -6.790756 -7.490756 -6.190756 ... 5886.6 19419 -59434 -61915 -104321 -64514 -40315 -65111 -55050 -69916
1 0.099825 0.085638 0.357112 0.012181 -0.064240 -7.2 -6.790756 -6.590756 -7.490756 -6.490756 ... -95459.4 5887 -61783 -58601 -63905 -43965 -64514 -66908 -79764 -53288
2 0.191616 0.099825 -0.075590 -0.061617 0.163955 -6.8 -7.190756 -6.890756 -6.890756 -6.690756 ... -47571.4 -95459 19419 11021 23761 -38142 -43965 -40315 -75885 -56112
3 0.426579 0.191616 0.085638 0.175644 0.124027 -6.7 -6.790756 -6.790756 -6.790756 -6.490756 ... 81283.6 -47571 5887 -59434 -80880 -38935 -38142 -64514 -65111 -80712
4 0.251320 0.426579 0.099825 0.357112 0.231902 -7.7 -6.690756 -7.190756 -6.590756 -5.790756 ... -43599.4 81284 -95459 -61783 18052 -69705 -38935 -43965 -66908 -63367

5 rows × 34 columns


In [25]:
import statsmodels.api as sm

In [35]:
y = empl_data.job_growth

In [36]:
X = empl_data.ix[:,1:34]
X.head()


Out[36]:
job_growth_min_1 job_growth_min_3 job_growth_min_6 job_growth_min_12 avg_weeks_unemp avg_weeks_unemp_min_1 avg_weeks_unemp_min_3 avg_weeks_unemp_min_6 avg_weeks_unemp_min_12 emp_pop_ratio ... taxes taxes_min_1 taxes_min_3 taxes_min_6 taxes_min_12 spending spending_min_1 spending_min_3 spending_min_6 spending_min_12
0 -0.075590 0.175644 0.274477 0.050552 -6.8 -6.890756 -6.790756 -7.490756 -6.190756 1.7 ... 5886.6 19419 -59434 -61915 -104321 -64514 -40315 -65111 -55050 -69916
1 0.085638 0.357112 0.012181 -0.064240 -7.2 -6.790756 -6.590756 -7.490756 -6.490756 1.7 ... -95459.4 5887 -61783 -58601 -63905 -43965 -64514 -66908 -79764 -53288
2 0.099825 -0.075590 -0.061617 0.163955 -6.8 -7.190756 -6.890756 -6.890756 -6.690756 1.7 ... -47571.4 -95459 19419 11021 23761 -38142 -43965 -40315 -75885 -56112
3 0.191616 0.085638 0.175644 0.124027 -6.7 -6.790756 -6.790756 -6.790756 -6.490756 2.0 ... 81283.6 -47571 5887 -59434 -80880 -38935 -38142 -64514 -65111 -80712
4 0.426579 0.099825 0.357112 0.231902 -7.7 -6.690756 -7.190756 -6.590756 -5.790756 2.1 ... -43599.4 81284 -95459 -61783 18052 -69705 -38935 -43965 -66908 -63367

5 rows × 33 columns


In [40]:
X = sm.add_constant(X)

In [41]:
est = sm.OLS(y,X).fit()

In [42]:
est.summary()


Out[42]:
OLS Regression Results
Dep. Variable: job_growth R-squared: 0.619
Model: OLS Adj. R-squared: 0.449
Method: Least Squares F-statistic: 3.643
Date: Sat, 07 Mar 2015 Prob (F-statistic): 2.00e-06
Time: 13:11:28 Log-Likelihood: 47.212
No. Observations: 108 AIC: -26.42
Df Residuals: 74 BIC: 64.77
Df Model: 33
coef std err t P>|t| [95.0% Conf. Int.]
const 0.1105 0.096 1.147 0.255 -0.081 0.302
job_growth_min_1 -0.2519 0.104 -2.422 0.018 -0.459 -0.045
job_growth_min_3 -0.0113 0.116 -0.097 0.923 -0.243 0.220
job_growth_min_6 -0.1333 0.114 -1.168 0.247 -0.361 0.094
job_growth_min_12 -0.1083 0.117 -0.928 0.357 -0.341 0.124
avg_weeks_unemp 0.0895 0.033 2.700 0.009 0.023 0.156
avg_weeks_unemp_min_1 -0.0389 0.036 -1.085 0.281 -0.110 0.033
avg_weeks_unemp_min_3 -0.0063 0.029 -0.221 0.826 -0.063 0.051
avg_weeks_unemp_min_6 0.0673 0.028 2.388 0.019 0.011 0.123
avg_weeks_unemp_min_12 -0.0543 0.019 -2.783 0.007 -0.093 -0.015
emp_pop_ratio 0.6522 0.484 1.347 0.182 -0.313 1.617
lbr_frc_prtcp -0.3543 0.448 -0.790 0.432 -1.248 0.539
not_in_lbr_frc 0.0002 0.000 0.657 0.513 -0.000 0.001
totl_emp_payrl -0.0001 9.24e-05 -1.421 0.160 -0.000 5.28e-05
unemp_rate -0.0204 0.332 -0.061 0.951 -0.683 0.642
fed_fund_rate 0.0198 0.041 0.482 0.631 -0.062 0.102
mortgage_rate 0.0028 0.101 0.027 0.978 -0.199 0.204
import_idx 0.0057 0.016 0.366 0.715 -0.025 0.037
export_idx -0.0379 0.020 -1.895 0.062 -0.078 0.002
cpi_idx 0.0527 0.041 1.283 0.204 -0.029 0.134
cpi_index_min_1 -0.0195 0.032 -0.606 0.547 -0.084 0.045
cpi_index_min_3 0.0078 0.018 0.432 0.667 -0.028 0.044
cpi_index_min_6 -0.0036 0.014 -0.265 0.792 -0.031 0.024
cpi_index_min_12 0.0112 0.017 0.667 0.507 -0.022 0.045
taxes -4.267e-07 4.08e-07 -1.046 0.299 -1.24e-06 3.86e-07
taxes_min_1 -2.865e-07 4.19e-07 -0.684 0.496 -1.12e-06 5.48e-07
taxes_min_3 -2.41e-07 3.65e-07 -0.659 0.512 -9.69e-07 4.87e-07
taxes_min_6 -3.078e-07 3.45e-07 -0.891 0.376 -9.96e-07 3.81e-07
taxes_min_12 -1.241e-07 4.45e-07 -0.279 0.781 -1.01e-06 7.64e-07
spending -8.608e-07 7.6e-07 -1.133 0.261 -2.38e-06 6.54e-07
spending_min_1 -2.734e-07 8.06e-07 -0.339 0.735 -1.88e-06 1.33e-06
spending_min_3 -3.801e-07 7.04e-07 -0.540 0.591 -1.78e-06 1.02e-06
spending_min_6 3.091e-07 7.06e-07 0.438 0.663 -1.1e-06 1.72e-06
spending_min_12 -8.209e-08 7.41e-07 -0.111 0.912 -1.56e-06 1.39e-06
Omnibus: 0.311 Durbin-Watson: 1.874
Prob(Omnibus): 0.856 Jarque-Bera (JB): 0.091
Skew: 0.058 Prob(JB): 0.955
Kurtosis: 3.083 Cond. No. 3.54e+06

In [43]:
test_data = pd.read_csv('fixtures/test_data.csv')

In [44]:
y_test = test_data.job_growth
X_test = test_data.ix[:,1:34]

In [45]:
est.predict(X_test)


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-45-f916d1250932> in <module>()
----> 1 est.predict(X_test)

/Users/itadmin/anaconda/lib/python2.7/site-packages/statsmodels/base/model.pyc in predict(self, exog, transform, *args, **kwargs)
    878             exog = dmatrix(self.model.data.orig_exog.design_info.builder,
    879                     exog)
--> 880         return self.model.predict(self.params, exog, *args, **kwargs)
    881 
    882 

/Users/itadmin/anaconda/lib/python2.7/site-packages/statsmodels/regression/linear_model.pyc in predict(self, params, exog)
    175         if exog is None:
    176             exog = self.exog
--> 177         return np.dot(exog, params)
    178 
    179 class GLS(RegressionModel):

ValueError: shapes (12,33) and (34,) not aligned: 33 (dim 1) != 34 (dim 0)

In [46]:
X_test.head()


Out[46]:
job_growth_min_1 job_growth_min_3 job_growth_min_6 job_growth_min_12 avg_weeks_unemp avg_weeks_unemp_min_1 avg_weeks_unemp_min_3 avg_weeks_unemp_min_6 avg_weeks_unemp_min_12 emp_pop_ratio ... taxes taxes_min_1 taxes_min_3 taxes_min_6 taxes_min_12 spending spending_min_1 spending_min_3 spending_min_6 spending_min_12
0 0.12527 -0.58490 0.17358 0.03350 9.00924 10.50924 9.20924 10.80924 9.20924 -1.8521 ... 99666.56303 88710.56303 2596.56303 3699.56303 75894.56303 47622.15966 -26803.84034 30886.15966 39002.15966 10715.15966
1 0.10481 0.72288 0.00901 0.07047 10.60924 9.00924 10.50924 11.10924 10.40924 -1.8521 ... -51981.43697 99666.56303 -13877.43697 -10960.43697 -73515.43697 79255.15966 47622.15966 59054.15966 74650.15966 67729.15966
2 0.16141 0.12527 0.00624 -0.03835 8.90924 10.60924 10.50924 10.90924 10.60924 -1.6521 ... 19515.56303 -51981.43697 88710.56303 105138.56303 -10312.43697 -5883.84034 79255.15966 -26803.84034 -32269.84034 33923.15966
3 0.14736 0.10481 -0.58490 0.20297 8.50924 8.90924 9.00924 9.20924 10.20924 -1.7521 ... 217906.56303 19515.56303 99666.56303 2596.56303 210392.56303 48758.15966 -5883.84034 47622.15966 30886.15966 35209.15966
4 0.22035 0.16141 0.72288 0.15661 8.00924 8.50924 10.60924 10.50924 10.60924 -1.7521 ... 3558.56303 217906.56303 -51981.43697 -13877.43697 851.56303 71235.15966 48758.15966 79255.15966 59054.15966 77289.15966

5 rows × 33 columns


In [47]:
X_test = sm.add_constant(X_test)

In [50]:
y_predicted = est.predict(X_test)

In [49]:
print(y_test)


0     0.10481
1     0.16141
2     0.14736
3     0.22035
4     0.16562
5     0.19278
6     0.17512
7     0.14604
8     0.19467
9     0.18712
10    0.25261
11    0.17988
Name: job_growth, dtype: float64

In [64]:
SSE = ((y_predicted - y_test)**2).sum()

In [65]:
SST = ((y_predicted - y_test.mean())**2).sum()

In [66]:
# Sum of Squared Errors/Residuals
print(SSE)


0.316107835165

In [67]:
# Sum of Squared Total
print(SST)


0.275184725735

In [69]:
# R-squared
R2 = 1 - (SSE/SST)
print(R2)


-0.148711413107

In [ ]: