In [19]:
import pandas as pd
import numpy as np
import matplotlib as plt
df=pd.read_table('C:\\Users\\Siddy\\Desktop\\occupancy_data\\datatraining.txt', sep=',')
df.dtypes
Out[19]:
In [20]:
df.head(2)
Out[20]:
In [21]:
df['date']=pd.to_datetime(df.date) #I have converted to datetime so we can use maths with datetime
df.dtypes
Out[21]:
In [44]:
df.set_index('date',inplace=True)
In [45]:
df1 = pd.DataFrame()
df2 = pd.DataFrame()
df2_new = pd.DataFrame()
df3 = pd.DataFrame()
df3_new = pd.DataFrame()
df4 = pd.DataFrame()
df4_new = pd.DataFrame()
df5 = pd.DataFrame()
df5_new = pd.DataFrame()
df6 = pd.DataFrame()
df6_new = pd.DataFrame()
df7 = pd.DataFrame()
df7_new = pd.DataFrame()
df8 = pd.DataFrame()
df8_new = pd.DataFrame()
df9 = pd.DataFrame()
df9_new = pd.DataFrame()
df10 = pd.DataFrame()
df10_new = pd.DataFrame()
In [46]:
df1 = df[:814]
df2= df[815:1628]
df3= df[1629:2443]
df4= df[2444:3258]
df5= df[3259:4073]
df6= df[4074:4888]
df7= df[4889:5703]
df8= df[5704:6518]
df9= df[6519:7333]
df10= df[7334:8148]
In [47]:
feature_cols=['Temperature', 'Humidity', 'Light', 'CO2']
df1_train_x=df1[feature_cols]
df1_train_y=df1.Occupancy
from sklearn.linear_model import LogisticRegression
logreg=LogisticRegression()
logreg.fit(df1_train_x,df1_train_y)
Out[47]:
In [90]:
df2_x=df2[feature_cols]
df2_y=df2.Occupancy
df3_x=df3[feature_cols]
df3_y=df3.Occupancy
df4_x=df4[feature_cols]
df4_y=df4.Occupancy
df5_x=df5[feature_cols]
df5_y=df5.Occupancy
df6_x=df6[feature_cols]
df6_y=df6.Occupancy
df7_x=df7[feature_cols]
df7_y=df7.Occupancy
df8_x=df8[feature_cols]
df8_y=df8.Occupancy
df9_x=df9[feature_cols]
df9_y=df9.Occupancy
df10_x=df10[feature_cols]
df10_y=df10.Occupancy
In [91]:
y_pred_class_df2=logreg.predict(df2_x) #test on df2 or next 10%
In [92]:
from sklearn import metrics
from sklearn.metrics import accuracy_score
df2_score=metrics.accuracy_score(df2_y,y_pred_class_df2)
In [93]:
results=pd.DataFrame({'Accuracy':[df2_score]}) # putting results in dataframe
results.head()
Out[93]:
In [94]:
df2_new=pd.concat([df1,df2],axis=0) #concating row waswise so axis =0, df2_new will be our new combined 20%
In [95]:
df2_new.shape #lets check shape
Out[95]:
In [96]:
df2_new_x=df2_new[feature_cols]
df2_new_y=df2_new.Occupancy
In [97]:
logreg.fit(df2_new_x,df2_new_y) #train model on combined 20%
Out[97]:
In [98]:
y_pred_class_df3=logreg.predict(df3_x) #test on df3 which 3rd chunk or batch
In [99]:
df3_score=metrics.accuracy_score(df3_y,y_pred_class_df3)
In [100]:
results.loc[1] = df3_score
results.head()
Out[100]:
In [101]:
df3_new=pd.concat([df2_new,df3],axis=0) #concating row waswise so axis =0, df3_new will be our new combined 30%
In [102]:
df3_new.shape
Out[102]:
In [103]:
df3_new_x=df3_new[feature_cols]
df3_new_y=df3_new.Occupancy
In [104]:
logreg.fit(df3_new_x,df3_new_y) #train model on combined 30%
Out[104]:
In [105]:
y_pred_class_df4=logreg.predict(df4_x) #test on df4 which 4th chunk or batch
In [106]:
df4_score=metrics.accuracy_score(df4_y,y_pred_class_df4)
In [107]:
results.loc[2] = df4_score
results.head()
Out[107]:
In [108]:
df4_new=pd.concat([df3_new,df4],axis=0) #concating row waswise so axis =0, df4_new will be our new combined 40%
In [109]:
df4_new.shape
Out[109]:
In [110]:
df4_new_x=df4_new[feature_cols]
df4_new_y=df4_new.Occupancy
In [111]:
logreg.fit(df4_new_x,df4_new_y) #train model on combined 40%
Out[111]:
In [112]:
y_pred_class_df5=logreg.predict(df5_x) #test on df5 which 5th chunk or batch
In [113]:
df5_score=metrics.accuracy_score(df5_y,y_pred_class_df5)
In [114]:
results.loc[3] = df5_score
results.head()
Out[114]:
In [115]:
df5_new=pd.concat([df4_new,df5],axis=0) #concating row waswise so axis =0, df5_new will be our new combined 50%
In [116]:
df5_new.shape
Out[116]:
In [117]:
df5_new_x=df5_new[feature_cols]
df5_new_y=df5_new.Occupancy
In [118]:
logreg.fit(df5_new_x,df5_new_y) #train model on combined 50%
Out[118]:
In [119]:
y_pred_class_df6=logreg.predict(df6_x) #test on df6 which 6th chunk or batch
In [120]:
df6_score=metrics.accuracy_score(df6_y,y_pred_class_df6)
In [121]:
results.loc[4] = df6_score
results.head()
Out[121]:
In [122]:
df6_new=pd.concat([df5_new,df6],axis=0) #concating row waswise so axis =0, df6_new will be our new combined 60%
In [123]:
df6_new.shape
Out[123]:
In [124]:
df6_new_x=df6_new[feature_cols]
df6_new_y=df6_new.Occupancy
logreg.fit(df6_new_x,df6_new_y) #train model on combined 60%
Out[124]:
In [126]:
y_pred_class_df7=logreg.predict(df7_x) #test on df7 which 7th chunk or batch
df7_score=metrics.accuracy_score(df7_y,y_pred_class_df7)
results.loc[5] = df7_score
results.head(10)
Out[126]:
In [127]:
df7_new=pd.concat([df6_new,df7],axis=0) #concating row waswise so axis =0, df7_new will be our new combined 70%
df7_new.shape
Out[127]:
In [128]:
df7_new_x=df7_new[feature_cols]
df7_new_y=df7_new.Occupancy
logreg.fit(df7_new_x,df7_new_y) #train model on combined 70%
Out[128]:
In [129]:
y_pred_class_df8=logreg.predict(df8_x) #test on df8 which 8th chunk or batch
df8_score=metrics.accuracy_score(df8_y,y_pred_class_df8)
results.loc[6] = df8_score
results.head(10)
Out[129]:
In [ ]:
#And So on