In [49]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

In [53]:
hrs=pd.read_csv('hrs.csv')
wage=pd.read_csv('wage.csv')
hc=pd.read_csv('hc.csv')
wage['total']=wage.ass_manager+wage.ass_ft+wage.ass_pt+wage.other

In [20]:
hrs.head()


Out[20]:
store mon year band Hrs NormalOT HolidayOT
0 101 1 2017 G1 14306.42 14.91 504.03
1 101 1 2017 G2 1482.83 4.96 63.22
2 101 1 2017 IH 420.39 0.00 7.07
3 101 1 2017 PT 16275.22 0.00 252.23
4 101 1 2017 S1 5235.48 20.10 111.51

In [21]:
wage.head()


Out[21]:
store manager ass_manager ass_ft ass_pt other ot mon year total
0 101 110179.69 169249.58 443494.03 455144.50 11276.12 59635.06 1 2018 1079164.23
1 102 70771.51 117890.77 324962.74 22084.38 7719.30 33407.27 1 2018 472657.19
2 104 82271.18 118125.09 277736.37 194475.94 11813.19 45762.36 1 2018 602150.59
3 105 82919.15 122245.71 230854.12 131086.92 4603.50 39893.99 1 2018 488790.25
4 106 56640.02 121773.72 236403.47 143285.55 10323.37 44008.42 1 2018 511786.11

In [54]:
hc.head()


Out[54]:
store band cnt
0 101 G1 131
1 101 G2 7
2 101 IH 95
3 101 M1 10
4 101 M2 1

In [85]:
models={}
tmp={}
rate=[]
store_nbr=[]
store_list=hrs.store.unique()
for sto in store_list:
    dt_hrs=hrs.set_index(['store','mon','year','band'])['Hrs'].unstack(-1).reset_index().fillna(0)
    X=dt_hrs[dt_hrs.store==sto]
    Y=wage[wage.store==sto]
    df=pd.merge(X,Y,on=['store','mon','year'])
    df_filter=df[df['year']==2018]
    df_filter['hrs']=df_filter.IH+df_filter.PT
    trainX,testX,trainY,testY=train_test_split(df_filter['hrs'].fillna(0).reshape(-1,1),df_filter.ass_pt,test_size=0.15)
    rgs=LinearRegression(fit_intercept=False)
    model=rgs.fit(trainX,trainY)
    store_nbr.append(sto)
    coef=model.coef_
    rate.append(coef[0])
    models[str(sto)]=model
tmp['store']=store_nbr
tmp['rate']=rate
result_pt=pd.DataFrame(tmp)


c:\program files\python36\lib\site-packages\ipykernel_launcher.py:12: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if sys.path[0] == '':
c:\program files\python36\lib\site-packages\ipykernel_launcher.py:13: FutureWarning: reshape is deprecated and will raise in a subsequent release. Please use .values.reshape(...) instead
  del sys.path[0]

In [86]:
models={}
tmp={}
rate=[]
store_nbr=[]
store_list=hrs.store.unique()
for sto in store_list:
    dt_hrs=hrs.set_index(['store','mon','year','band'])['Hrs'].unstack(-1).reset_index().fillna(0)
    X=dt_hrs[dt_hrs.store==sto]
    Y=wage[wage.store==sto]
    df=pd.merge(X,Y,on=['store','mon','year'])
    df_filter=df[df['year']==2018]
    df_filter['hrs']=df_filter.G1+df_filter.G2+df_filter.T2+df_filter.T1
    trainX,testX,trainY,testY=train_test_split(df_filter['hrs'].fillna(0).reshape(-1,1),df_filter.ass_ft,test_size=0.15)
    rgs=LinearRegression(fit_intercept=False)
    model=rgs.fit(trainX,trainY)
    store_nbr.append(sto)
    coef=model.coef_
    rate.append(coef[0])
    models[str(sto)]=model
tmp['store']=store_nbr
tmp['rate']=rate
result_ft=pd.DataFrame(tmp)


c:\program files\python36\lib\site-packages\ipykernel_launcher.py:12: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if sys.path[0] == '':
c:\program files\python36\lib\site-packages\ipykernel_launcher.py:13: FutureWarning: reshape is deprecated and will raise in a subsequent release. Please use .values.reshape(...) instead
  del sys.path[0]

In [89]:
result_pt.to_csv('pt_rate.csv',index=False)

In [90]:
result_ft.to_csv('ft_rate.csv',index=False)

In [87]:
result_pt


Out[87]:
rate store
0 15.877838 101
1 16.576738 104
2 16.003401 105
3 17.037550 106
4 17.775601 107
5 15.938331 108
6 12.896446 110
7 14.323024 111
8 17.590250 113
9 8.112399 114
10 14.460210 117
11 13.241298 118
12 16.756465 119
13 16.844309 120
14 18.661229 121
15 11.567659 122
16 14.203868 124
17 15.656157 125
18 13.099760 126
19 10.139139 127
20 13.547861 128
21 14.282380 201
22 11.406030 205
23 12.639736 206
24 11.875336 207
25 11.815165 208
26 13.654704 209
27 9.759217 210
28 10.869927 211
29 13.085392 212
... ... ...
367 20.652382 2744
368 17.272054 2746
369 17.528679 2752
370 12.515928 3400
371 11.052366 3401
372 16.762036 3402
373 17.751687 3403
374 17.691124 3404
375 12.225472 3407
376 17.735890 3409
377 15.657366 3410
378 16.413689 3413
379 12.798169 3414
380 12.199994 3416
381 15.601778 3417
382 16.158975 3420
383 13.871869 3421
384 21.409315 3423
385 15.838301 3424
386 14.554797 3425
387 17.454594 3432
388 15.948677 5744
389 17.422993 5745
390 13.824270 5746
391 14.706206 5780
392 13.238790 5782
393 12.161423 5823
394 15.322431 5824
395 13.030020 5835
396 20.864847 5845

397 rows × 2 columns


In [88]:
result_ft


Out[88]:
rate store
0 16.354197 101
1 16.701703 104
2 16.851804 105
3 15.747170 106
4 17.331890 107
5 15.838320 108
6 16.990434 110
7 17.158527 111
8 18.730886 113
9 12.392901 114
10 16.308718 117
11 14.490640 118
12 17.751333 119
13 16.420790 120
14 16.359724 121
15 14.714008 122
16 17.466344 124
17 15.750361 125
18 15.271845 126
19 14.412811 127
20 15.336411 128
21 13.946976 201
22 11.946425 205
23 14.314081 206
24 12.834378 207
25 13.357523 208
26 14.441515 209
27 13.176369 210
28 12.778409 211
29 13.424073 212
... ... ...
367 16.353572 2744
368 13.265568 2746
369 13.375541 2752
370 17.046912 3400
371 13.964763 3401
372 19.782887 3402
373 21.055597 3403
374 20.157046 3404
375 15.523184 3407
376 20.449451 3409
377 14.645976 3410
378 14.651468 3413
379 15.711609 3414
380 13.781229 3416
381 14.713931 3417
382 15.187553 3420
383 16.000985 3421
384 19.041351 3423
385 18.622666 3424
386 13.687687 3425
387 18.295387 3432
388 14.643461 5744
389 15.009324 5745
390 12.193194 5746
391 15.065083 5780
392 13.695231 5782
393 17.958823 5823
394 17.786327 5824
395 13.365845 5835
396 18.599340 5845

397 rows × 2 columns


In [ ]: