In [3]:
## Section 4.3 Table 2
In [2]:
import data
import pandas as pd
from sklearn.cross_validation import train_test_split
/Users/gandalf/anaconda/lib/python3.5/site-packages/sklearn/cross_validation.py:44: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.
"This module will be removed in 0.20.", DeprecationWarning)
In [45]:
mydata = data.alldata.copy()
mydata
Out[45]:
mains
television
fan
fridge
laptop computer
electric heating element
oven
unknown
washing machine
microwave
toaster
sockets
cooker
Kitchen
LivingRoom
StoreRoom
Room1
Room2
2015-07-05 00:00:03
0.0
0.0
0.00
0.000000
0.000000
0.00
0.0
0.0
0.00
0.00
0.0
0.00
0.0
1
0
0
0
0
2015-07-05 00:00:04
0.0
0.0
0.00
0.000000
0.000000
0.00
0.0
0.0
0.00
0.00
0.0
0.00
0.0
0
0
0
0
0
2015-07-05 00:00:05
0.0
0.0
0.00
0.000000
0.000000
0.00
0.0
0.0
0.00
0.00
0.0
0.00
0.0
0
0
0
0
0
2015-07-05 00:00:06
0.0
0.0
0.00
0.000000
0.000000
0.00
0.0
0.0
0.00
0.00
0.0
0.00
0.0
0
0
0
0
0
2015-07-05 00:00:07
0.0
0.0
0.00
0.000000
0.000000
0.00
0.0
0.0
0.00
0.00
0.0
7.35
0.0
0
1
0
0
0
2015-07-05 00:00:08
223.0
0.0
0.00
99.210000
0.000000
0.00
0.0
0.0
0.00
0.00
0.0
7.35
0.0
0
0
1
0
1
2015-07-05 00:00:09
223.6
0.0
0.00
99.179070
28.340000
0.00
0.0
0.0
0.00
0.00
0.0
7.35
0.0
0
1
0
0
0
2015-07-05 00:00:10
224.2
0.0
0.00
99.148140
28.378095
0.00
0.0
0.0
0.00
0.00
0.0
7.35
0.0
0
1
0
0
0
2015-07-05 00:00:11
224.8
0.0
0.00
99.117209
28.416190
2.29
0.0
0.0
0.00
0.00
0.0
7.35
0.0
1
0
0
0
0
2015-07-05 00:00:12
225.4
0.0
0.00
99.086279
28.454286
2.29
0.0
0.0
0.00
0.00
0.0
7.35
0.0
0
0
0
0
0
2015-07-05 00:00:13
226.0
0.0
0.00
99.055349
28.492381
2.29
0.0
0.0
0.00
0.00
0.0
7.35
0.0
0
0
0
0
0
2015-07-05 00:00:14
226.6
0.0
0.00
99.024419
28.530476
2.29
0.0
0.0
0.00
0.00
0.0
7.35
0.0
0
0
0
0
0
2015-07-05 00:00:15
227.2
0.0
0.00
98.993488
28.568571
2.29
0.0
0.0
0.00
0.00
0.0
7.35
0.0
0
0
0
0
0
2015-07-05 00:00:16
227.8
0.0
0.00
98.962558
28.606667
2.29
0.0
0.0
0.00
0.00
0.0
7.35
0.0
0
0
0
0
0
2015-07-05 00:00:17
228.4
0.0
0.00
98.931628
28.644762
2.29
0.0
0.0
0.00
0.00
0.0
7.35
0.0
0
0
0
0
0
2015-07-05 00:00:18
229.0
0.0
0.00
98.900698
28.682857
2.29
0.0
0.0
0.00
0.00
0.0
7.35
0.0
0
0
0
0
0
2015-07-05 00:00:19
226.1
0.0
0.00
98.869767
28.720952
2.29
0.0
0.0
0.00
0.00
0.0
7.35
0.0
0
0
0
0
0
2015-07-05 00:00:20
223.2
0.0
0.00
98.838837
28.759048
2.29
0.0
0.0
0.00
0.00
0.0
7.35
0.0
0
0
0
0
0
2015-07-05 00:00:21
220.3
0.0
0.00
98.807907
28.797143
2.29
0.0
0.0
0.00
0.00
0.0
7.35
0.0
0
0
0
0
0
2015-07-05 00:00:22
217.4
0.0
29.65
98.776977
28.835238
2.29
0.0
0.0
0.00
1.24
0.0
7.35
0.0
0
0
1
1
0
2015-07-05 00:00:23
214.5
0.0
29.65
98.746047
28.873333
2.29
0.0
0.0
0.00
1.24
0.0
7.35
0.0
0
0
0
0
0
2015-07-05 00:00:24
211.6
0.0
29.65
98.715116
28.911429
2.29
0.0
0.0
0.00
1.24
0.0
7.35
0.0
0
0
0
0
0
2015-07-05 00:00:25
208.7
0.0
29.65
98.684186
28.949524
2.29
0.0
0.0
0.00
1.24
0.0
7.35
0.0
1
0
0
0
0
2015-07-05 00:00:26
205.8
0.0
29.65
98.653256
28.987619
2.29
0.0
0.0
0.68
1.24
0.0
7.35
0.0
1
0
0
0
0
2015-07-05 00:00:27
202.9
0.0
29.65
98.622326
29.025714
2.29
0.0
0.0
0.68
1.24
0.0
7.35
0.0
0
0
0
0
0
2015-07-05 00:00:28
200.0
0.0
29.65
98.591395
29.063810
2.29
0.0
0.0
0.68
1.24
0.0
7.35
0.0
0
0
0
0
0
2015-07-05 00:00:29
199.9
0.0
29.65
98.560465
29.101905
2.29
0.0
0.0
0.68
1.24
0.0
7.35
0.0
0
0
0
0
0
2015-07-05 00:00:30
199.8
0.0
29.65
98.529535
29.140000
2.29
0.0
0.0
0.68
1.24
0.0
7.35
0.0
0
0
0
0
0
2015-07-05 00:00:31
199.7
0.0
29.65
98.498605
29.178095
2.29
0.0
0.0
0.68
1.24
0.0
7.35
0.0
0
0
0
0
0
2015-07-05 00:00:32
199.6
0.0
29.65
98.467674
29.216190
2.29
0.0
0.0
0.68
1.24
0.0
7.35
0.0
0
0
0
0
0
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
2015-12-05 21:54:46
0.0
0.0
0.00
0.000000
17.400000
2.55
0.0
0.0
0.95
1.24
0.0
1.39
0.0
0
0
0
0
0
2015-12-05 21:54:47
0.0
0.0
0.00
0.000000
17.400000
2.55
0.0
0.0
0.95
1.24
0.0
1.39
0.0
0
0
0
0
0
2015-12-05 21:54:48
0.0
0.0
0.00
0.000000
17.400000
2.55
0.0
0.0
0.95
1.24
0.0
1.39
0.0
0
0
0
0
0
2015-12-05 21:54:49
0.0
0.0
0.00
0.000000
17.400000
2.55
0.0
0.0
0.95
1.24
0.0
1.39
0.0
0
0
0
0
0
2015-12-05 21:54:50
0.0
0.0
0.00
0.000000
17.400000
2.55
0.0
0.0
0.95
1.24
0.0
1.39
0.0
0
0
0
0
0
2015-12-05 21:54:51
0.0
0.0
0.00
0.000000
17.400000
2.55
0.0
0.0
0.95
1.24
0.0
1.39
0.0
0
0
0
0
0
2015-12-05 21:54:52
0.0
0.0
0.00
0.000000
17.400000
2.55
0.0
0.0
0.95
1.24
0.0
1.39
0.0
0
0
0
0
0
2015-12-05 21:54:53
0.0
0.0
0.00
0.000000
17.400000
2.55
0.0
0.0
0.95
1.24
0.0
1.39
0.0
0
0
0
0
0
2015-12-05 21:54:54
0.0
0.0
0.00
0.000000
17.400000
2.55
0.0
0.0
0.95
1.24
0.0
1.39
0.0
0
0
0
0
0
2015-12-05 21:54:55
0.0
0.0
0.00
0.000000
17.400000
2.55
0.0
0.0
0.95
1.24
0.0
1.39
0.0
0
0
0
0
0
2015-12-05 21:54:56
0.0
0.0
0.00
0.000000
17.400000
2.55
0.0
0.0
0.95
1.24
0.0
1.39
0.0
0
0
0
0
0
2015-12-05 21:54:57
0.0
0.0
0.00
0.000000
17.400000
2.55
0.0
0.0
0.95
1.24
0.0
1.39
0.0
0
0
0
0
0
2015-12-05 21:54:58
0.0
0.0
0.00
0.000000
17.400000
2.55
0.0
0.0
0.95
1.24
0.0
1.39
0.0
0
0
0
0
0
2015-12-05 21:54:59
0.0
0.0
0.00
0.000000
17.400000
2.55
0.0
0.0
0.95
1.24
0.0
1.39
0.0
0
0
0
0
0
2015-12-05 21:55:00
0.0
0.0
0.00
0.000000
17.400000
2.55
0.0
0.0
0.95
1.24
0.0
1.39
0.0
0
0
0
0
0
2015-12-05 21:55:01
0.0
0.0
0.00
0.000000
17.400000
2.55
0.0
0.0
0.95
1.24
0.0
1.39
0.0
0
0
0
0
0
2015-12-05 21:55:02
0.0
0.0
0.00
0.000000
17.400000
2.55
0.0
0.0
0.95
1.24
0.0
1.39
0.0
0
0
0
0
0
2015-12-05 21:55:03
0.0
0.0
0.00
0.000000
17.400000
2.55
0.0
0.0
0.95
1.24
0.0
1.39
0.0
0
0
0
0
0
2015-12-05 21:55:04
0.0
0.0
0.00
0.000000
17.400000
2.55
0.0
0.0
0.95
1.24
0.0
1.39
0.0
0
0
0
0
0
2015-12-05 21:55:05
0.0
0.0
0.00
0.000000
16.675000
2.55
0.0
0.0
0.95
1.24
0.0
1.39
0.0
0
0
0
0
0
2015-12-05 21:55:06
0.0
0.0
0.00
0.000000
15.950000
2.55
0.0
0.0
0.95
1.24
0.0
1.39
0.0
0
0
0
0
0
2015-12-05 21:55:07
0.0
0.0
0.00
0.000000
15.225000
2.55
0.0
0.0
0.95
1.24
0.0
1.39
0.0
0
0
0
0
0
2015-12-05 21:55:08
0.0
0.0
0.00
0.000000
14.500000
2.55
0.0
0.0
0.95
1.24
0.0
1.39
0.0
0
0
0
0
0
2015-12-05 21:55:09
0.0
0.0
0.00
0.000000
13.775000
2.55
0.0
0.0
0.95
1.24
0.0
1.39
0.0
0
0
0
0
0
2015-12-05 21:55:10
0.0
0.0
0.00
0.000000
13.050000
2.55
0.0
0.0
0.95
1.24
0.0
1.39
0.0
0
0
0
0
0
2015-12-05 21:55:11
0.0
0.0
0.00
0.000000
12.325000
2.55
0.0
0.0
0.95
1.24
0.0
1.39
0.0
0
0
0
0
0
2015-12-05 21:55:12
0.0
0.0
0.00
0.000000
11.600000
2.55
0.0
0.0
0.95
1.24
0.0
1.39
0.0
0
0
0
0
0
2015-12-05 21:55:13
0.0
0.0
0.00
0.000000
10.875000
2.55
0.0
0.0
0.95
1.24
0.0
1.39
0.0
0
0
0
0
0
2015-12-05 21:55:14
0.0
0.0
0.00
0.000000
10.150000
2.55
0.0
0.0
0.95
1.24
0.0
1.39
0.0
0
0
0
0
0
2015-12-05 21:55:15
0.0
0.0
0.00
0.000000
9.425000
2.55
0.0
0.0
0.95
1.24
0.0
1.39
0.0
0
1
0
0
0
13298113 rows × 18 columns
In [7]:
from sklearn import tree
import matplotlib.pyplot as plt
import datetime as dt
import numpy as np
mydata1 = mydata.copy()
x3 = mydata1[['television','fan','fridge','laptop computer','electric heating element','oven','unknown','washing machine','microwave','toaster','sockets','cooker']]
#xrange = np.arange(x3.min(),x3.max(),(x3.max()-x3.min())/100).reshape(100,1)
y1 = mydata1['Kitchen'].astype(float)
y2 = mydata1['LivingRoom'].astype(float)
y3 = mydata1['StoreRoom'].astype(float)
y4 = mydata1['Room1'].astype(float)
y5 = mydata1['Room2'].astype(float)
In [1]:
reg1 = tree.DecisionTreeClassifier(max_depth=10)
x4 = mydata[['electric heating element', 'washing machine', 'cooker', 'unknown']]
xtrain, xtest, ytrain, ytest = train_test_split(x4, y1, test_size=0.1)
reg1 = reg1.fit(xtrain,ytrain)
print(reg1.score(xtest,ytest))
print(reg1.feature_importances_)
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-1-b24d1d852eb1> in <module>()
----> 1 reg1 = tree.DecisionTreeClassifier(max_depth=10)
2 x4 = mydata[['electric heating element', 'washing machine', 'cooker', 'unknown']]
3 xtrain, xtest, ytrain, ytest = train_test_split(x4, y1, test_size=0.1)
4 reg1 = reg1.fit(xtrain,ytrain)
5 print(reg1.score(xtest,ytest))
NameError: name 'tree' is not defined
In [41]:
reg2 = tree.DecisionTreeClassifier(max_depth=10)
x5 = mydata[['television', 'laptop computer', 'sockets']]
xtrain, xtest, ytrain, ytest = train_test_split(x5, y2, test_size=0.1)
reg2 = reg2.fit(xtrain,ytrain)
print(reg2.score(xtest,ytest))
print(reg2.feature_importances_)
### run this
0.999706725462
[ 0.10016142 0.00437112 0.03027846 0.7878331 0.02034202 0.
0.00092022 0.005225 0.00861924 0. 0.03619724 0.00605218]
In [42]:
reg3 = tree.DecisionTreeClassifier(max_depth=10)
x6 = mydata[['fridge', 'microwave']]
xtrain, xtest, ytrain, ytest = train_test_split(x6, y3, test_size=0.1)
reg3 = reg3.fit(xtrain,ytrain)
print(reg3.score(xtest,ytest))
print(reg3.feature_importances_)
0.999939089134
[ 0.04972268 0.01573463 0.07082126 0.14148523 0.0405032 0.0295637 0.
0.03090138 0.53176731 0.00391251 0.07810079 0.00748729]
In [43]:
reg4 = tree.DecisionTreeClassifier(max_depth=10)
x7 = mydata[['fan']]
xtrain, xtest, ytrain, ytest = train_test_split(x6, y4, test_size=0.1)
reg4 = reg4.fit(xtrain,ytrain)
print(reg4.score(xtest,ytest))
print(reg4.feature_importances_)
0.999974432476
[ 0.01798298 0.68366542 0.0431845 0.09189921 0.07967155 0. 0.
0.01286296 0.01793722 0. 0.05083786 0.0019583 ]
In [44]:
reg5 = tree.DecisionTreeClassifier(max_depth=10)
x8 = mydata[['oven', 'toaster']]
xtrain, xtest, ytrain, ytest = train_test_split(x7, y5, test_size=0.1)
reg5 = reg5.fit(xtrain,ytrain)
print(reg5.score(xtest,ytest))
print(reg5.feature_importances_)
0.999966912616
[ 1.50732437e-01 4.57161633e-07 5.70465267e-02 1.41142566e-01
3.53573243e-02 1.25419748e-01 8.71923519e-03 9.10933806e-03
6.29649092e-02 2.43908905e-01 1.51587825e-01 1.40107281e-02]
In [ ]:
Content source: marioberges/F16-12-752
Similar notebooks: