notebook.community

Edit and run



In [2]:

    
import pandas as pd
from matplotlib import pyplot as plt

%matplotlib inline



In [13]:

    
empl_data = pd.read_csv('fixtures/training_data.csv')
test_data = pd.read_csv('fixtures/test_data.csv')



In [15]:

    
print empl_data.shape
print test_data.shape









    



(108, 34)
(12, 34)



In [5]:

    
empl_data.head()









    Out[5]:






  
    
      
      job_growth
      job_growth_per
      job_growth_min_1
      job_growth_min_3
      job_growth_min_6
      job_growth_min_12
      bls_avg_weeks_unemp_per
      avg_weeks_unemp
      avg_weeks_unemp_min_1
      avg_weeks_unemp_min_3
      ...
      taxes_min_1
      taxes_min_3
      taxes_min_6
      taxes_min_12
      outlays
      spending
      spending_min_1
      spending_min_3
      spending_min_6
      spending_min_12
    
  
  
    
      0
       0.085638
       0.000856
      -0.075590
       0.175644
       0.274477
       0.050552
       19.5
      -6.8
      -6.890756
      -6.790756
      ...
       19419
      -59434
      -61915
      -104321
       194111
      -64514
      -40315
      -65111
      -55050
      -69916
    
    
      1
       0.099825
       0.000998
       0.085638
       0.357112
       0.012181
      -0.064240
       19.1
      -7.2
      -6.790756
      -6.590756
      ...
        5887
      -61783
      -58601
       -63905
       214660
      -43965
      -64514
      -66908
      -79764
      -53288
    
    
      2
       0.191616
       0.001916
       0.099825
      -0.075590
      -0.061617
       0.163955
       19.5
      -6.8
      -7.190756
      -6.890756
      ...
      -95459
       19419
       11021
        23761
       220483
      -38142
      -43965
      -40315
      -75885
      -56112
    
    
      3
       0.426579
       0.004266
       0.191616
       0.085638
       0.175644
       0.124027
       19.6
      -6.7
      -6.790756
      -6.790756
      ...
      -47571
        5887
      -59434
       -80880
       219690
      -38935
      -38142
      -64514
      -65111
      -80712
    
    
      4
       0.251320
       0.002513
       0.426579
       0.099825
       0.357112
       0.231902
       18.6
      -7.7
      -6.690756
      -7.190756
      ...
       81284
      -95459
      -61783
        18052
       188920
      -69705
      -38935
      -43965
      -66908
      -63367
    
  

5 rows × 48 columns



In [6]:

    
fig, axs = plt.subplots(1, 3, sharey=True)
empl_data.plot(kind='scatter', x='cpi_idx', y='job_growth', ax=axs[0], figsize=(16,8))
empl_data.plot(kind='scatter', x='cpi_index_min_3', y='job_growth', ax=axs[1], figsize=(16,8))
empl_data.plot(kind='scatter', x='cpi_index_min_12', y='job_growth', ax=axs[2], figsize=(16,8))









    Out[6]:





<matplotlib.axes._subplots.AxesSubplot at 0x10bb63950>



In [16]:

    
# follow the usual sklearn pattern: import, instantiate, fit
from sklearn import linear_model
regr = linear_model.Lasso(alpha=.3)



In [18]:

    
# using scikit-learn
feature_cols = ['job_growth_min_1',
 'job_growth_min_3',
 'job_growth_min_6',
 'job_growth_min_12',
 'avg_weeks_unemp',
 'emp_pop_ratio',
 'lbr_frc_prtcp',
 'not_in_lbr_frc',
 'totl_emp_payrl',
 'unemp_rate',
 'fed_fund_rate',
 'mortgage_rate',
 'cpi_idx',
 'cpi_index_min_1',
 'cpi_index_min_3',
 'cpi_index_min_6',
 'cpi_index_min_12',
 'export_idx',
 'import_idx',
 'taxes',
 'taxes_min_1',
 'taxes_min_3',
 'taxes_min_6',
 'taxes_min_12',
 'spending',
 'spending_min_1',
 'spending_min_3',
 'spending_min_6',
 'spending_min_12']
X = empl_data[feature_cols]
y = empl_data['job_growth']



In [19]:

    
regr.fit(X, y)









    Out[19]:





Lasso(alpha=0.3, copy_X=True, fit_intercept=True, max_iter=1000,
   normalize=False, positive=False, precompute='auto', tol=0.0001,
   warm_start=False)



In [24]:

    
regr.score(X,y)









    Out[24]:





0.24025627272146721



In [20]:

    
regr.coef_









    Out[20]:





array([  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   8.03984485e-03,   0.00000000e+00,
        -0.00000000e+00,   2.87458611e-04,   3.11837998e-05,
        -0.00000000e+00,   0.00000000e+00,  -0.00000000e+00,
        -0.00000000e+00,  -0.00000000e+00,  -0.00000000e+00,
        -0.00000000e+00,  -0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,  -4.54083370e-07,  -4.72029232e-07,
        -1.37079301e-07,  -7.45042452e-07,  -4.20317310e-07,
        -1.79906363e-06,  -1.35544634e-06,  -1.77981275e-07,
         5.65260857e-07,  -5.88612741e-07])



In [22]:

    
X_test = test_data[feature_cols]
y_test = test_data['job_growth']



In [23]:

    
regr.score(X_test, y_test)









    Out[23]:





-21.966515138132593



In [ ]:

	job_growth	job_growth_per	job_growth_min_1	job_growth_min_3	job_growth_min_6	job_growth_min_12	bls_avg_weeks_unemp_per	avg_weeks_unemp	avg_weeks_unemp_min_1	avg_weeks_unemp_min_3	...	taxes_min_1	taxes_min_3	taxes_min_6	taxes_min_12	outlays	spending	spending_min_1	spending_min_3	spending_min_6	spending_min_12
0	0.085638	0.000856	-0.075590	0.175644	0.274477	0.050552	19.5	-6.8	-6.890756	-6.790756	...	19419	-59434	-61915	-104321	194111	-64514	-40315	-65111	-55050	-69916
1	0.099825	0.000998	0.085638	0.357112	0.012181	-0.064240	19.1	-7.2	-6.790756	-6.590756	...	5887	-61783	-58601	-63905	214660	-43965	-64514	-66908	-79764	-53288
2	0.191616	0.001916	0.099825	-0.075590	-0.061617	0.163955	19.5	-6.8	-7.190756	-6.890756	...	-95459	19419	11021	23761	220483	-38142	-43965	-40315	-75885	-56112
3	0.426579	0.004266	0.191616	0.085638	0.175644	0.124027	19.6	-6.7	-6.790756	-6.790756	...	-47571	5887	-59434	-80880	219690	-38935	-38142	-64514	-65111	-80712
4	0.251320	0.002513	0.426579	0.099825	0.357112	0.231902	18.6	-7.7	-6.690756	-7.190756	...	81284	-95459	-61783	18052	188920	-69705	-38935	-43965	-66908	-63367