In [2]:
import pandas as pd
from matplotlib import pyplot as plt
%matplotlib inline
In [13]:
empl_data = pd.read_csv('fixtures/training_data.csv')
test_data = pd.read_csv('fixtures/test_data.csv')
In [15]:
print empl_data.shape
print test_data.shape
In [5]:
empl_data.head()
Out[5]:
In [6]:
fig, axs = plt.subplots(1, 3, sharey=True)
empl_data.plot(kind='scatter', x='cpi_idx', y='job_growth', ax=axs[0], figsize=(16,8))
empl_data.plot(kind='scatter', x='cpi_index_min_3', y='job_growth', ax=axs[1], figsize=(16,8))
empl_data.plot(kind='scatter', x='cpi_index_min_12', y='job_growth', ax=axs[2], figsize=(16,8))
Out[6]:
In [16]:
# follow the usual sklearn pattern: import, instantiate, fit
from sklearn import linear_model
regr = linear_model.Lasso(alpha=.3)
In [18]:
# using scikit-learn
feature_cols = ['job_growth_min_1',
'job_growth_min_3',
'job_growth_min_6',
'job_growth_min_12',
'avg_weeks_unemp',
'emp_pop_ratio',
'lbr_frc_prtcp',
'not_in_lbr_frc',
'totl_emp_payrl',
'unemp_rate',
'fed_fund_rate',
'mortgage_rate',
'cpi_idx',
'cpi_index_min_1',
'cpi_index_min_3',
'cpi_index_min_6',
'cpi_index_min_12',
'export_idx',
'import_idx',
'taxes',
'taxes_min_1',
'taxes_min_3',
'taxes_min_6',
'taxes_min_12',
'spending',
'spending_min_1',
'spending_min_3',
'spending_min_6',
'spending_min_12']
X = empl_data[feature_cols]
y = empl_data['job_growth']
In [19]:
regr.fit(X, y)
Out[19]:
In [24]:
regr.score(X,y)
Out[24]:
In [20]:
regr.coef_
Out[20]:
In [22]:
X_test = test_data[feature_cols]
y_test = test_data['job_growth']
In [23]:
regr.score(X_test, y_test)
Out[23]:
In [ ]: