In [19]:
import pandas as pd
import numpy as np
import matplotlib as plt
df=pd.read_table('C:\\Users\\Siddy\\Desktop\\occupancy_data\\datatraining.txt', sep=',')
df.dtypes


Out[19]:
date              object
Temperature      float64
Humidity         float64
Light            float64
CO2              float64
HumidityRatio    float64
Occupancy          int64
dtype: object

In [20]:
df.head(2)


Out[20]:
date Temperature Humidity Light CO2 HumidityRatio Occupancy
1 2015-02-04 17:51:00 23.18 27.2720 426.0 721.25 0.004793 1
2 2015-02-04 17:51:59 23.15 27.2675 429.5 714.00 0.004783 1

In [21]:
df['date']=pd.to_datetime(df.date) #I have converted to datetime so we can use maths with datetime
df.dtypes


Out[21]:
date             datetime64[ns]
Temperature             float64
Humidity                float64
Light                   float64
CO2                     float64
HumidityRatio           float64
Occupancy                 int64
dtype: object

In [44]:
df.set_index('date',inplace=True)

In [45]:
df1 = pd.DataFrame()
df2 = pd.DataFrame()
df2_new = pd.DataFrame()
df3 = pd.DataFrame()
df3_new = pd.DataFrame()
df4 = pd.DataFrame()
df4_new = pd.DataFrame()
df5 = pd.DataFrame()
df5_new = pd.DataFrame()
df6 = pd.DataFrame()
df6_new = pd.DataFrame()
df7 = pd.DataFrame()
df7_new = pd.DataFrame()
df8 = pd.DataFrame()
df8_new = pd.DataFrame()
df9 = pd.DataFrame()
df9_new = pd.DataFrame()
df10 = pd.DataFrame()
df10_new = pd.DataFrame()

In [46]:
df1 = df[:814]
df2= df[815:1628]
df3= df[1629:2443]
df4= df[2444:3258]
df5= df[3259:4073]
df6= df[4074:4888]
df7= df[4889:5703]
df8= df[5704:6518]
df9= df[6519:7333]
df10= df[7334:8148]

In [47]:
feature_cols=['Temperature', 'Humidity', 'Light', 'CO2']
df1_train_x=df1[feature_cols]
df1_train_y=df1.Occupancy
from sklearn.linear_model import LogisticRegression
logreg=LogisticRegression()
logreg.fit(df1_train_x,df1_train_y)


Out[47]:
LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [90]:
df2_x=df2[feature_cols]
df2_y=df2.Occupancy
df3_x=df3[feature_cols]
df3_y=df3.Occupancy
df4_x=df4[feature_cols]
df4_y=df4.Occupancy
df5_x=df5[feature_cols]
df5_y=df5.Occupancy
df6_x=df6[feature_cols]
df6_y=df6.Occupancy
df7_x=df7[feature_cols]
df7_y=df7.Occupancy
df8_x=df8[feature_cols]
df8_y=df8.Occupancy
df9_x=df9[feature_cols]
df9_y=df9.Occupancy
df10_x=df10[feature_cols]
df10_y=df10.Occupancy

In [91]:
y_pred_class_df2=logreg.predict(df2_x) #test on df2 or next 10%

In [92]:
from sklearn import metrics
from sklearn.metrics import accuracy_score
df2_score=metrics.accuracy_score(df2_y,y_pred_class_df2)

In [93]:
results=pd.DataFrame({'Accuracy':[df2_score]}) # putting results in dataframe 
results.head()


Out[93]:
Accuracy
0 0.906519

In [94]:
df2_new=pd.concat([df1,df2],axis=0)  #concating row waswise so axis =0, df2_new will be our new combined 20%

In [95]:
df2_new.shape #lets check shape


Out[95]:
(1627, 6)

In [96]:
df2_new_x=df2_new[feature_cols]
df2_new_y=df2_new.Occupancy

In [97]:
logreg.fit(df2_new_x,df2_new_y) #train model on combined 20%


Out[97]:
LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [98]:
y_pred_class_df3=logreg.predict(df3_x) #test on df3 which 3rd chunk or batch

In [99]:
df3_score=metrics.accuracy_score(df3_y,y_pred_class_df3)

In [100]:
results.loc[1] = df3_score
results.head()


Out[100]:
Accuracy
0 0.906519
1 0.991400

In [101]:
df3_new=pd.concat([df2_new,df3],axis=0)  #concating row waswise so axis =0, df3_new will be our new combined 30%

In [102]:
df3_new.shape


Out[102]:
(2441, 6)

In [103]:
df3_new_x=df3_new[feature_cols]
df3_new_y=df3_new.Occupancy

In [104]:
logreg.fit(df3_new_x,df3_new_y) #train model on combined 30%


Out[104]:
LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [105]:
y_pred_class_df4=logreg.predict(df4_x) #test on df4 which 4th chunk or batch

In [106]:
df4_score=metrics.accuracy_score(df4_y,y_pred_class_df4)

In [107]:
results.loc[2] = df4_score
results.head()


Out[107]:
Accuracy
0 0.906519
1 0.991400
2 0.987715

In [108]:
df4_new=pd.concat([df3_new,df4],axis=0)  #concating row waswise so axis =0, df4_new will be our new combined 40%

In [109]:
df4_new.shape


Out[109]:
(3255, 6)

In [110]:
df4_new_x=df4_new[feature_cols]
df4_new_y=df4_new.Occupancy

In [111]:
logreg.fit(df4_new_x,df4_new_y) #train model on combined 40%


Out[111]:
LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [112]:
y_pred_class_df5=logreg.predict(df5_x) #test on df5 which 5th chunk or batch

In [113]:
df5_score=metrics.accuracy_score(df5_y,y_pred_class_df5)

In [114]:
results.loc[3] = df5_score
results.head()


Out[114]:
Accuracy
0 0.906519
1 0.991400
2 0.987715
3 0.857494

In [115]:
df5_new=pd.concat([df4_new,df5],axis=0)  #concating row waswise so axis =0, df5_new will be our new combined 50%

In [116]:
df5_new.shape


Out[116]:
(4069, 6)

In [117]:
df5_new_x=df5_new[feature_cols]
df5_new_y=df5_new.Occupancy

In [118]:
logreg.fit(df5_new_x,df5_new_y) #train model on combined 50%


Out[118]:
LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [119]:
y_pred_class_df6=logreg.predict(df6_x) #test on df6 which 6th chunk or batch

In [120]:
df6_score=metrics.accuracy_score(df6_y,y_pred_class_df6)

In [121]:
results.loc[4] = df6_score
results.head()


Out[121]:
Accuracy
0 0.906519
1 0.991400
2 0.987715
3 0.857494
4 1.000000

In [122]:
df6_new=pd.concat([df5_new,df6],axis=0)  #concating row waswise so axis =0, df6_new will be our new combined 60%

In [123]:
df6_new.shape


Out[123]:
(4883, 6)

In [124]:
df6_new_x=df6_new[feature_cols]
df6_new_y=df6_new.Occupancy
logreg.fit(df6_new_x,df6_new_y) #train model on combined 60%


Out[124]:
LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [126]:
y_pred_class_df7=logreg.predict(df7_x) #test on df7 which 7th chunk or batch 
df7_score=metrics.accuracy_score(df7_y,y_pred_class_df7)
results.loc[5] = df7_score
results.head(10)


Out[126]:
Accuracy
0 0.906519
1 0.991400
2 0.987715
3 0.857494
4 1.000000
5 1.000000

In [127]:
df7_new=pd.concat([df6_new,df7],axis=0)  #concating row waswise so axis =0, df7_new will be our new combined 70%
df7_new.shape


Out[127]:
(5697, 6)

In [128]:
df7_new_x=df7_new[feature_cols]
df7_new_y=df7_new.Occupancy
logreg.fit(df7_new_x,df7_new_y) #train model on combined 70%


Out[128]:
LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [129]:
y_pred_class_df8=logreg.predict(df8_x) #test on df8 which 8th chunk or batch 
df8_score=metrics.accuracy_score(df8_y,y_pred_class_df8)
results.loc[6] = df8_score
results.head(10)


Out[129]:
Accuracy
0 0.906519
1 0.991400
2 0.987715
3 0.857494
4 1.000000
5 1.000000
6 1.000000

In [ ]:
#And So on