notebook.community

Edit and run



In [2]:

    
import pandas as pd
from matplotlib import pyplot as plt

%matplotlib inline



In [3]:

    
empl_data = pd.read_csv('fixtures/training_data.csv')



In [4]:

    
print empl_data.shape



In [5]:

    
empl_data.head()









    Out[5]:






  
    
      
      job_growth
      job_growth_min_1
      job_growth_min_3
      job_growth_min_6
      job_growth_min_12
      avg_weeks_unemp
      avg_weeks_unemp_min_1
      avg_weeks_unemp_min_3
      avg_weeks_unemp_min_6
      avg_weeks_unemp_min_12
      ...
      taxes
      taxes_min_1
      taxes_min_3
      taxes_min_6
      taxes_min_12
      spending
      spending_min_1
      spending_min_3
      spending_min_6
      spending_min_12
    
  
  
    
      0
       0.085638
      -0.075590
       0.175644
       0.274477
       0.050552
      -6.8
      -6.890756
      -6.790756
      -7.490756
      -6.190756
      ...
        5886.6
       19419
      -59434
      -61915
      -104321
      -64514
      -40315
      -65111
      -55050
      -69916
    
    
      1
       0.099825
       0.085638
       0.357112
       0.012181
      -0.064240
      -7.2
      -6.790756
      -6.590756
      -7.490756
      -6.490756
      ...
      -95459.4
        5887
      -61783
      -58601
       -63905
      -43965
      -64514
      -66908
      -79764
      -53288
    
    
      2
       0.191616
       0.099825
      -0.075590
      -0.061617
       0.163955
      -6.8
      -7.190756
      -6.890756
      -6.890756
      -6.690756
      ...
      -47571.4
      -95459
       19419
       11021
        23761
      -38142
      -43965
      -40315
      -75885
      -56112
    
    
      3
       0.426579
       0.191616
       0.085638
       0.175644
       0.124027
      -6.7
      -6.790756
      -6.790756
      -6.790756
      -6.490756
      ...
       81283.6
      -47571
        5887
      -59434
       -80880
      -38935
      -38142
      -64514
      -65111
      -80712
    
    
      4
       0.251320
       0.426579
       0.099825
       0.357112
       0.231902
      -7.7
      -6.690756
      -7.190756
      -6.590756
      -5.790756
      ...
      -43599.4
       81284
      -95459
      -61783
        18052
      -69705
      -38935
      -43965
      -66908
      -63367
    
  

5 rows × 34 columns



In [6]:

    
fig, axs = plt.subplots(1, 3, sharey=True)
empl_data.plot(kind='scatter', x='cpi_idx', y='job_growth', ax=axs[0], figsize=(16,8))
empl_data.plot(kind='scatter', x='cpi_index_min_3', y='job_growth', ax=axs[1], figsize=(16,8))
empl_data.plot(kind='scatter', x='cpi_index_min_12', y='job_growth', ax=axs[2], figsize=(16,8))









    Out[6]:





<matplotlib.axes._subplots.AxesSubplot at 0x10a020e90>



In [7]:

    
# follow the usual sklearn pattern: import, instantiate, fit
from sklearn.linear_model import LinearRegression
slm = LinearRegression()



In [8]:

    
list(empl_data.columns.values)









    Out[8]:





['job_growth',
 'job_growth_min_1',
 'job_growth_min_3',
 'job_growth_min_6',
 'job_growth_min_12',
 'avg_weeks_unemp',
 'avg_weeks_unemp_min_1',
 'avg_weeks_unemp_min_3',
 'avg_weeks_unemp_min_6',
 'avg_weeks_unemp_min_12',
 'emp_pop_ratio',
 'lbr_frc_prtcp',
 'not_in_lbr_frc',
 'totl_emp_payrl',
 'unemp_rate',
 'fed_fund_rate',
 'mortgage_rate',
 'import_idx',
 'export_idx',
 'cpi_idx',
 'cpi_index_min_1',
 'cpi_index_min_3',
 'cpi_index_min_6',
 'cpi_index_min_12',
 'taxes',
 'taxes_min_1',
 'taxes_min_3',
 'taxes_min_6',
 'taxes_min_12',
 'spending',
 'spending_min_1',
 'spending_min_3',
 'spending_min_6',
 'spending_min_12']



In [9]:

    
# using scikit-learn
feature_cols = ['job_growth_min_1',
 'job_growth_min_3',
 'job_growth_min_6',
 'job_growth_min_12',
 'avg_weeks_unemp',
 'emp_pop_ratio',
 'lbr_frc_prtcp',
 'not_in_lbr_frc',
 'totl_emp_payrl',
 'unemp_rate',
 'fed_fund_rate',
 'mortgage_rate',
 'cpi_idx',
 'cpi_index_min_1',
 'cpi_index_min_3',
 'cpi_index_min_6',
 'cpi_index_min_12',
 'export_idx',
 'import_idx',
 'taxes',
 'taxes_min_1',
 'taxes_min_3',
 'taxes_min_6',
 'taxes_min_12',
 'spending',
 'spending_min_1',
 'spending_min_3',
 'spending_min_6',
 'spending_min_12']
X = empl_data[feature_cols]
y = empl_data['job_growth']



In [16]:

    
slm.fit(X, y)









    Out[16]:





LinearRegression(copy_X=True, fit_intercept=True, normalize=False)



In [17]:

    
slm.score(X,y)









    Out[17]:





0.5590431502011759



In [18]:

    
test_data = pd.read_csv('fixtures/test_data.csv')



In [19]:

    
X_test = test_data[feature_cols]
y_test = test_data['job_growth']



In [20]:

    
slm.score(X_test,y_test)









    Out[20]:





-68.484538755862502



In [ ]:

	job_growth	job_growth_min_1	job_growth_min_3	job_growth_min_6	job_growth_min_12	avg_weeks_unemp	avg_weeks_unemp_min_1	avg_weeks_unemp_min_3	avg_weeks_unemp_min_6	avg_weeks_unemp_min_12	...	taxes	taxes_min_1	taxes_min_3	taxes_min_6	taxes_min_12	spending	spending_min_1	spending_min_3	spending_min_6	spending_min_12
0	0.085638	-0.075590	0.175644	0.274477	0.050552	-6.8	-6.890756	-6.790756	-7.490756	-6.190756	...	5886.6	19419	-59434	-61915	-104321	-64514	-40315	-65111	-55050	-69916
1	0.099825	0.085638	0.357112	0.012181	-0.064240	-7.2	-6.790756	-6.590756	-7.490756	-6.490756	...	-95459.4	5887	-61783	-58601	-63905	-43965	-64514	-66908	-79764	-53288
2	0.191616	0.099825	-0.075590	-0.061617	0.163955	-6.8	-7.190756	-6.890756	-6.890756	-6.690756	...	-47571.4	-95459	19419	11021	23761	-38142	-43965	-40315	-75885	-56112
3	0.426579	0.191616	0.085638	0.175644	0.124027	-6.7	-6.790756	-6.790756	-6.790756	-6.490756	...	81283.6	-47571	5887	-59434	-80880	-38935	-38142	-64514	-65111	-80712
4	0.251320	0.426579	0.099825	0.357112	0.231902	-7.7	-6.690756	-7.190756	-6.590756	-5.790756	...	-43599.4	81284	-95459	-61783	18052	-69705	-38935	-43965	-66908	-63367