In [2]:
import pandas as pd
from matplotlib import pyplot as plt

%matplotlib inline

In [3]:
empl_data = pd.read_csv('fixtures/training_data.csv')

In [4]:
print empl_data.shape


(108, 34)

In [5]:
empl_data.head()


Out[5]:
job_growth job_growth_min_1 job_growth_min_3 job_growth_min_6 job_growth_min_12 avg_weeks_unemp avg_weeks_unemp_min_1 avg_weeks_unemp_min_3 avg_weeks_unemp_min_6 avg_weeks_unemp_min_12 ... taxes taxes_min_1 taxes_min_3 taxes_min_6 taxes_min_12 spending spending_min_1 spending_min_3 spending_min_6 spending_min_12
0 0.085638 -0.075590 0.175644 0.274477 0.050552 -6.8 -6.890756 -6.790756 -7.490756 -6.190756 ... 5886.6 19419 -59434 -61915 -104321 -64514 -40315 -65111 -55050 -69916
1 0.099825 0.085638 0.357112 0.012181 -0.064240 -7.2 -6.790756 -6.590756 -7.490756 -6.490756 ... -95459.4 5887 -61783 -58601 -63905 -43965 -64514 -66908 -79764 -53288
2 0.191616 0.099825 -0.075590 -0.061617 0.163955 -6.8 -7.190756 -6.890756 -6.890756 -6.690756 ... -47571.4 -95459 19419 11021 23761 -38142 -43965 -40315 -75885 -56112
3 0.426579 0.191616 0.085638 0.175644 0.124027 -6.7 -6.790756 -6.790756 -6.790756 -6.490756 ... 81283.6 -47571 5887 -59434 -80880 -38935 -38142 -64514 -65111 -80712
4 0.251320 0.426579 0.099825 0.357112 0.231902 -7.7 -6.690756 -7.190756 -6.590756 -5.790756 ... -43599.4 81284 -95459 -61783 18052 -69705 -38935 -43965 -66908 -63367

5 rows × 34 columns


In [6]:
fig, axs = plt.subplots(1, 3, sharey=True)
empl_data.plot(kind='scatter', x='cpi_idx', y='job_growth', ax=axs[0], figsize=(16,8))
empl_data.plot(kind='scatter', x='cpi_index_min_3', y='job_growth', ax=axs[1], figsize=(16,8))
empl_data.plot(kind='scatter', x='cpi_index_min_12', y='job_growth', ax=axs[2], figsize=(16,8))


Out[6]:
<matplotlib.axes._subplots.AxesSubplot at 0x10a020e90>

In [7]:
# follow the usual sklearn pattern: import, instantiate, fit
from sklearn.linear_model import LinearRegression
slm = LinearRegression()

In [8]:
list(empl_data.columns.values)


Out[8]:
['job_growth',
 'job_growth_min_1',
 'job_growth_min_3',
 'job_growth_min_6',
 'job_growth_min_12',
 'avg_weeks_unemp',
 'avg_weeks_unemp_min_1',
 'avg_weeks_unemp_min_3',
 'avg_weeks_unemp_min_6',
 'avg_weeks_unemp_min_12',
 'emp_pop_ratio',
 'lbr_frc_prtcp',
 'not_in_lbr_frc',
 'totl_emp_payrl',
 'unemp_rate',
 'fed_fund_rate',
 'mortgage_rate',
 'import_idx',
 'export_idx',
 'cpi_idx',
 'cpi_index_min_1',
 'cpi_index_min_3',
 'cpi_index_min_6',
 'cpi_index_min_12',
 'taxes',
 'taxes_min_1',
 'taxes_min_3',
 'taxes_min_6',
 'taxes_min_12',
 'spending',
 'spending_min_1',
 'spending_min_3',
 'spending_min_6',
 'spending_min_12']

In [9]:
# using scikit-learn
feature_cols = ['job_growth_min_1',
 'job_growth_min_3',
 'job_growth_min_6',
 'job_growth_min_12',
 'avg_weeks_unemp',
 'emp_pop_ratio',
 'lbr_frc_prtcp',
 'not_in_lbr_frc',
 'totl_emp_payrl',
 'unemp_rate',
 'fed_fund_rate',
 'mortgage_rate',
 'cpi_idx',
 'cpi_index_min_1',
 'cpi_index_min_3',
 'cpi_index_min_6',
 'cpi_index_min_12',
 'export_idx',
 'import_idx',
 'taxes',
 'taxes_min_1',
 'taxes_min_3',
 'taxes_min_6',
 'taxes_min_12',
 'spending',
 'spending_min_1',
 'spending_min_3',
 'spending_min_6',
 'spending_min_12']
X = empl_data[feature_cols]
y = empl_data['job_growth']

In [16]:
slm.fit(X, y)


Out[16]:
LinearRegression(copy_X=True, fit_intercept=True, normalize=False)

In [17]:
slm.score(X,y)


Out[17]:
0.5590431502011759

In [18]:
test_data = pd.read_csv('fixtures/test_data.csv')

In [19]:
X_test = test_data[feature_cols]
y_test = test_data['job_growth']

In [20]:
slm.score(X_test,y_test)


Out[20]:
-68.484538755862502

In [ ]: