In [3]:
%matplotlib inline
import pickle
import numpy as np
import sys
import os.path
sys.path.append(os.path.join(os.pardir,os.pardir))
import disaggregator as da
from pylearn2.utils import serial
import theano
import matplotlib.pyplot as plt
In [4]:
psda = da.PecanStreetDatasetAdapter
db_url = "postgresql://USERNAME:PASSWORD@db.wiki-energy.org:5432/postgres"
psda.set_url(db_url)
In [5]:
schema = 'shared'
tables = [u'validated_01_2014',
u'validated_02_2014',
u'validated_03_2014',
u'validated_04_2014',
u'validated_05_2014',]
'''
all_ids = []
for month in range(5):
air1_ids = psda.get_dataids_with_real_values(schema,tables[month],'air1')
furnace1_ids = psda.get_dataids_with_real_values(schema,tables[month],'furnace1')
all_ids.append(air1_ids)
all_ids.append(furnace1_ids)
common_ids = da.utils.get_common_ids(all_ids)
'''
common_ids = [86, 93, 94, 410, 484, 624, 661, 821, 871, 936, 1167, 1334, 1632, 1953, 1994, 2156, 2158, 2171, 2242, 2470, 2575, 2606, 2769, 2814, 2829, 2864, 2953, 2974, 3092, 3192, 3221, 3263, 3367, 3456, 3482, 3504, 3649, 3723, 3736, 3778, 3893, 4135, 4154, 4298, 4526, 4641, 4767, 4874, 4922, 4956, 4957, 4998, 5026, 5109, 5209, 5218, 5395, 5545, 5568, 5677, 5814, 5874, 5938, 5949, 5972, 6139, 6673, 6730, 6836, 6910, 7062, 7319, 7390, 7531, 7536, 7617, 7769, 7788, 7800, 7850, 7863, 7875, 7940, 7951, 8046, 8079, 8142, 8197, 8292, 8342, 8419, 8645, 8669, 8956, 9019, 9036, 9356, 9484, 9578, 9609, 9654, 9701, 9729, 9737, 9771, 9830, 9875, 9915, 9922, 9932, 9934, 9938, 9939, 9982, 9983]
print common_ids
In [6]:
n = len(common_ids)
n_train = n/2
n_valid = n/4
n_test = n - (n/2 + n/4)
train_ids = common_ids[:n_train]
valid_ids = common_ids[n_train:n_train+n_valid]
test_ids = common_ids[n_train+n_valid:n_train+n_valid+n_test]
print test_ids
In [7]:
model_file = "models/ac_disagg_subset_cpu.pkl"
with open(model_file,'r') as f:
model = pickle.load(f)
In [8]:
test_inputs = np.array([[0,0,0,0,0,0,0,0,0,0]])
In [9]:
X = model.get_input_space().make_theano_batch()
Y = model.fprop(X)
f = theano.function([X],Y,allow_input_downcast=True)
In [10]:
outputs = f(test_inputs[:,:,np.newaxis,np.newaxis])
In [11]:
plt.plot(outputs[0])
Out[11]:
In [12]:
instances = psda.generate_instances_for_appliance_by_dataids(schema,tables[2:4],'use',test_ids[:10],sample_rate='15T')
In [13]:
def get_disaggregated_signal(trace,window_length,target_index):
windows = trace.get_windows(10,1)
totals = trace.series.values.astype(float)
class_probs = f(windows[:,:,np.newaxis,np.newaxis])
n_classes = class_probs.shape[1]
prepend = np.zeros(target_index,dtype=np.int)
postpend = np.zeros(window_length - target_index,dtype=np.int)
classes = np.argmax(class_probs,axis=1)
all_classes = np.concatenate((prepend,classes,postpend))
prop_array = np.linspace(0,1,n_classes + 1)
all_props = np.array([prop_array[class_] for class_ in all_classes])
return totals,all_props * totals
for instance in instances:
total_signal,ac_signal = get_disaggregated_signal(instance.traces[0],10,6)
plt.plot(total_signal)
plt.plot(ac_signal,'r')
plt.show()
What went wrong? - lets look at the dataset this was trained on. Does the dataset make sense?
In [14]:
with open('../../data/pylearn2/ac_percentage_subset_test.pkl', 'r') as f:
test_dataset = pickle.load(f)
In [16]:
plt.hist(np.argmax(test_dataset.y,axis=1),bins=19)
Out[16]:
In [ ]: