In [49]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
In [53]:
hrs=pd.read_csv('hrs.csv')
wage=pd.read_csv('wage.csv')
hc=pd.read_csv('hc.csv')
wage['total']=wage.ass_manager+wage.ass_ft+wage.ass_pt+wage.other
In [20]:
hrs.head()
Out[20]:
In [21]:
wage.head()
Out[21]:
In [54]:
hc.head()
Out[54]:
In [85]:
models={}
tmp={}
rate=[]
store_nbr=[]
store_list=hrs.store.unique()
for sto in store_list:
dt_hrs=hrs.set_index(['store','mon','year','band'])['Hrs'].unstack(-1).reset_index().fillna(0)
X=dt_hrs[dt_hrs.store==sto]
Y=wage[wage.store==sto]
df=pd.merge(X,Y,on=['store','mon','year'])
df_filter=df[df['year']==2018]
df_filter['hrs']=df_filter.IH+df_filter.PT
trainX,testX,trainY,testY=train_test_split(df_filter['hrs'].fillna(0).reshape(-1,1),df_filter.ass_pt,test_size=0.15)
rgs=LinearRegression(fit_intercept=False)
model=rgs.fit(trainX,trainY)
store_nbr.append(sto)
coef=model.coef_
rate.append(coef[0])
models[str(sto)]=model
tmp['store']=store_nbr
tmp['rate']=rate
result_pt=pd.DataFrame(tmp)
In [86]:
models={}
tmp={}
rate=[]
store_nbr=[]
store_list=hrs.store.unique()
for sto in store_list:
dt_hrs=hrs.set_index(['store','mon','year','band'])['Hrs'].unstack(-1).reset_index().fillna(0)
X=dt_hrs[dt_hrs.store==sto]
Y=wage[wage.store==sto]
df=pd.merge(X,Y,on=['store','mon','year'])
df_filter=df[df['year']==2018]
df_filter['hrs']=df_filter.G1+df_filter.G2+df_filter.T2+df_filter.T1
trainX,testX,trainY,testY=train_test_split(df_filter['hrs'].fillna(0).reshape(-1,1),df_filter.ass_ft,test_size=0.15)
rgs=LinearRegression(fit_intercept=False)
model=rgs.fit(trainX,trainY)
store_nbr.append(sto)
coef=model.coef_
rate.append(coef[0])
models[str(sto)]=model
tmp['store']=store_nbr
tmp['rate']=rate
result_ft=pd.DataFrame(tmp)
In [89]:
result_pt.to_csv('pt_rate.csv',index=False)
In [90]:
result_ft.to_csv('ft_rate.csv',index=False)
In [87]:
result_pt
Out[87]:
In [88]:
result_ft
Out[88]:
In [ ]: