In [2]:
import pandas as pd
from matplotlib import pyplot as plt
%matplotlib inline
In [3]:
empl_data = pd.read_csv('fixtures/training_data.csv')
In [4]:
print empl_data.shape
In [5]:
empl_data.head()
Out[5]:
In [6]:
fig, axs = plt.subplots(1, 3, sharey=True)
empl_data.plot(kind='scatter', x='cpi_idx', y='job_growth', ax=axs[0], figsize=(16,8))
empl_data.plot(kind='scatter', x='cpi_index_min_3', y='job_growth', ax=axs[1], figsize=(16,8))
empl_data.plot(kind='scatter', x='cpi_index_min_12', y='job_growth', ax=axs[2], figsize=(16,8))
Out[6]:
In [7]:
# follow the usual sklearn pattern: import, instantiate, fit
from sklearn.linear_model import LinearRegression
slm = LinearRegression()
In [8]:
list(empl_data.columns.values)
Out[8]:
In [9]:
# using scikit-learn
feature_cols = ['job_growth_min_1',
'job_growth_min_3',
'job_growth_min_6',
'job_growth_min_12',
'avg_weeks_unemp',
'emp_pop_ratio',
'lbr_frc_prtcp',
'not_in_lbr_frc',
'totl_emp_payrl',
'unemp_rate',
'fed_fund_rate',
'mortgage_rate',
'cpi_idx',
'cpi_index_min_1',
'cpi_index_min_3',
'cpi_index_min_6',
'cpi_index_min_12',
'export_idx',
'import_idx',
'taxes',
'taxes_min_1',
'taxes_min_3',
'taxes_min_6',
'taxes_min_12',
'spending',
'spending_min_1',
'spending_min_3',
'spending_min_6',
'spending_min_12']
X = empl_data[feature_cols]
y = empl_data['job_growth']
In [16]:
slm.fit(X, y)
Out[16]:
In [17]:
slm.score(X,y)
Out[17]:
In [18]:
test_data = pd.read_csv('fixtures/test_data.csv')
In [19]:
X_test = test_data[feature_cols]
y_test = test_data['job_growth']
In [20]:
slm.score(X_test,y_test)
Out[20]:
In [ ]: