In [2]:
import pandas as pd
import numpy as np
import matplotlib as plt
df=pd.read_table('C:\\Users\\Siddy\\Desktop\\occupancy_data\\datatraining.txt', sep=',')
df.dtypes
Out[2]:
In [3]:
df.shape
Out[3]:
In [4]:
df.head(10)
Out[4]:
In [5]:
df.columns
Out[5]:
In [6]:
df['date']=pd.to_datetime(df.date) #I have converted to datetime so we can use maths with datetime
df.dtypes
Out[6]:
In [7]:
ts=pd.to_datetime('2/4/2015')
In [8]:
df.date.max()
Out[8]:
In [9]:
df.date.min()
Out[9]:
In [10]:
(df.date.max()-df.date.min()).seconds
Out[10]:
In [11]:
df.shape
Out[11]:
In [12]:
df.set_index('date',inplace=True)
In [104]:
df1 = pd.DataFrame()
df2 = pd.DataFrame()
df3 = pd.DataFrame()
df4 = pd.DataFrame()
df5 = pd.DataFrame()
df6 = pd.DataFrame()
df7 = pd.DataFrame()
df8 = pd.DataFrame()
df9 = pd.DataFrame()
df10 = pd.DataFrame()
df1 = df[:814]
df2= df[815:1628]
df3= df[1629:2443]
df4= df[2444:3258]
df5= df[3259:4073]
df6= df[4074:4888]
df7= df[4889:5703]
df8= df[5704:6518]
df9= df[6519:7333]
df10= df[7334:8148]
In [14]:
set(df['Occupancy'])
Out[14]:
In [75]:
feature_cols=['Temperature', 'Humidity', 'Light', 'CO2']
df1_train_x=df1[feature_cols]
df1_train_y=df1.Occupancy
df2_x=df2[feature_cols]
df2_y=df2.Occupancy
df3_x=df3[feature_cols]
df3_y=df3.Occupancy
df4_x=df4[feature_cols]
df4_y=df4.Occupancy
df5_x=df5[feature_cols]
df5_y=df5.Occupancy
df6_y=df6.Occupancy
df6_y=df6.Occupancy
df7_x=df7[feature_cols]
df7_y=df7.Occupancy
df8_x=df8[feature_cols]
df8_y=df8.Occupancy
df9_x=df9[feature_cols]
df9_y=df9.Occupancy
df10_x=df10[feature_cols]
df10_y=df10.Occupancy
In [76]:
from sklearn.linear_model import LogisticRegression
logreg=LogisticRegression()
In [77]:
logreg.fit(df1_train_x,df1_train_y)
Out[77]:
In [78]:
y_pred_class_df2=logreg.predict(df2_x) #test on df2 or next 10%
In [79]:
#Now we will check our classification model accuracy which we have trained on df1 and predicted values of or tested on df2
In [80]:
from sklearn import metrics
from sklearn.metrics import accuracy_score
df2_score=metrics.accuracy_score(df2_y,y_pred_class_df2)
In [81]:
results=pd.DataFrame({'Accuracy':[df2_score]}) # putting results in dataframe
results.head()
Out[81]:
In [82]:
#now we will train classification model on next 10% which is our df2 and test it on df3 and so on
In [83]:
logreg.fit(df2_x,df2_y)
Out[83]:
In [84]:
y_pred_class_df3=logreg.predict(df3_x)
df3_score=metrics.accuracy_score(df3_y,y_pred_class_df3)
In [85]:
results.loc[1] = df3_score
results.head()
Out[85]:
In [86]:
logreg.fit(df3_x,df3_y)
Out[86]:
In [87]:
y_pred_class_df4=logreg.predict(df4_x)
df4_score=metrics.accuracy_score(df4_y,y_pred_class_df4)
results.loc[2] = df4_score
results.head()
Out[87]:
In [88]:
logreg.fit(df4_x,df4_y)
Out[88]:
In [89]:
y_pred_class_df5=logreg.predict(df5_x)
df5_score=metrics.accuracy_score(df5_y,y_pred_class_df5)
results.loc[3] = df5_score
results.head()
Out[89]:
In [94]:
logreg.fit(df5_x, df5_y)
In [ ]:
couldent find much so just moved on to different classification SVM
In [95]:
from sklearn import svm
svm_classifier = svm.SVC()
svm_classifier.fit(df1_train_x,df1_train_y)
Out[95]:
In [96]:
y_pred_svmclass_df2=svm_classifier.predict(df2_x)
df2_svmscore=metrics.accuracy_score(df2_y,y_pred_svmclass_df2)
results_svm=pd.DataFrame({'Accuracy of SVM':[df2_svmscore]}) # putting results in dataframe
results_svm.head()
Out[96]:
In [97]:
svm_classifier.fit(df2_x,df2_y)
Out[97]:
In [98]:
y_pred_svmclass_df3=svm_classifier.predict(df3_x)
df3_svmscore=metrics.accuracy_score(df3_y,y_pred_svmclass_df3)
results_svm.loc[1] = df3_svmscore
results_svm.head()
Out[98]:
In [99]:
svm_classifier.fit(df3_x,df3_y)
Out[99]:
In [100]:
y_pred_svmclass_df4=svm_classifier.predict(df4_x)
df4_svmscore=metrics.accuracy_score(df4_y,y_pred_svmclass_df4)
results_svm.loc[2] = df4_svmscore
results_svm.head()
Out[100]:
In [101]:
svm_classifier.fit(df4_x,df4_y)
Out[101]:
In [102]:
y_pred_svmclass_df5=svm_classifier.predict(df5_x)
df5_svmscore=metrics.accuracy_score(df5_y,y_pred_svmclass_df5)
results_svm.loc[3] = df5_svmscore
results_svm.head()
Out[102]:
In [103]:
svm_classifier.fit(df5_x,df5_y)
In [ ]:
#got same error as logistic regression