In [2]:
import pandas as pd
from matplotlib import pyplot as plt

%matplotlib inline

In [13]:
empl_data = pd.read_csv('fixtures/training_data.csv')
test_data = pd.read_csv('fixtures/test_data.csv')

In [15]:
print empl_data.shape
print test_data.shape


(108, 34)
(12, 34)

In [5]:
empl_data.head()


Out[5]:
job_growth job_growth_per job_growth_min_1 job_growth_min_3 job_growth_min_6 job_growth_min_12 bls_avg_weeks_unemp_per avg_weeks_unemp avg_weeks_unemp_min_1 avg_weeks_unemp_min_3 ... taxes_min_1 taxes_min_3 taxes_min_6 taxes_min_12 outlays spending spending_min_1 spending_min_3 spending_min_6 spending_min_12
0 0.085638 0.000856 -0.075590 0.175644 0.274477 0.050552 19.5 -6.8 -6.890756 -6.790756 ... 19419 -59434 -61915 -104321 194111 -64514 -40315 -65111 -55050 -69916
1 0.099825 0.000998 0.085638 0.357112 0.012181 -0.064240 19.1 -7.2 -6.790756 -6.590756 ... 5887 -61783 -58601 -63905 214660 -43965 -64514 -66908 -79764 -53288
2 0.191616 0.001916 0.099825 -0.075590 -0.061617 0.163955 19.5 -6.8 -7.190756 -6.890756 ... -95459 19419 11021 23761 220483 -38142 -43965 -40315 -75885 -56112
3 0.426579 0.004266 0.191616 0.085638 0.175644 0.124027 19.6 -6.7 -6.790756 -6.790756 ... -47571 5887 -59434 -80880 219690 -38935 -38142 -64514 -65111 -80712
4 0.251320 0.002513 0.426579 0.099825 0.357112 0.231902 18.6 -7.7 -6.690756 -7.190756 ... 81284 -95459 -61783 18052 188920 -69705 -38935 -43965 -66908 -63367

5 rows × 48 columns


In [6]:
fig, axs = plt.subplots(1, 3, sharey=True)
empl_data.plot(kind='scatter', x='cpi_idx', y='job_growth', ax=axs[0], figsize=(16,8))
empl_data.plot(kind='scatter', x='cpi_index_min_3', y='job_growth', ax=axs[1], figsize=(16,8))
empl_data.plot(kind='scatter', x='cpi_index_min_12', y='job_growth', ax=axs[2], figsize=(16,8))


Out[6]:
<matplotlib.axes._subplots.AxesSubplot at 0x10bb63950>

In [16]:
# follow the usual sklearn pattern: import, instantiate, fit
from sklearn import linear_model
regr = linear_model.Lasso(alpha=.3)

In [18]:
# using scikit-learn
feature_cols = ['job_growth_min_1',
 'job_growth_min_3',
 'job_growth_min_6',
 'job_growth_min_12',
 'avg_weeks_unemp',
 'emp_pop_ratio',
 'lbr_frc_prtcp',
 'not_in_lbr_frc',
 'totl_emp_payrl',
 'unemp_rate',
 'fed_fund_rate',
 'mortgage_rate',
 'cpi_idx',
 'cpi_index_min_1',
 'cpi_index_min_3',
 'cpi_index_min_6',
 'cpi_index_min_12',
 'export_idx',
 'import_idx',
 'taxes',
 'taxes_min_1',
 'taxes_min_3',
 'taxes_min_6',
 'taxes_min_12',
 'spending',
 'spending_min_1',
 'spending_min_3',
 'spending_min_6',
 'spending_min_12']
X = empl_data[feature_cols]
y = empl_data['job_growth']

In [19]:
regr.fit(X, y)


Out[19]:
Lasso(alpha=0.3, copy_X=True, fit_intercept=True, max_iter=1000,
   normalize=False, positive=False, precompute='auto', tol=0.0001,
   warm_start=False)

In [24]:
regr.score(X,y)


Out[24]:
0.24025627272146721

In [20]:
regr.coef_


Out[20]:
array([  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   8.03984485e-03,   0.00000000e+00,
        -0.00000000e+00,   2.87458611e-04,   3.11837998e-05,
        -0.00000000e+00,   0.00000000e+00,  -0.00000000e+00,
        -0.00000000e+00,  -0.00000000e+00,  -0.00000000e+00,
        -0.00000000e+00,  -0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,  -4.54083370e-07,  -4.72029232e-07,
        -1.37079301e-07,  -7.45042452e-07,  -4.20317310e-07,
        -1.79906363e-06,  -1.35544634e-06,  -1.77981275e-07,
         5.65260857e-07,  -5.88612741e-07])

In [22]:
X_test = test_data[feature_cols]
y_test = test_data['job_growth']

In [23]:
regr.score(X_test, y_test)


Out[23]:
-21.966515138132593

In [ ]: