In [3]:
%matplotlib inline
import pickle
import numpy as np
import sys
import os.path
sys.path.append(os.path.join(os.pardir,os.pardir))
import disaggregator as da
from pylearn2.utils import serial
import theano
import matplotlib.pyplot as plt

In [4]:
psda = da.PecanStreetDatasetAdapter
db_url = "postgresql://USERNAME:PASSWORD@db.wiki-energy.org:5432/postgres"
psda.set_url(db_url)

In [5]:
schema = 'shared'
tables = [u'validated_01_2014',
          u'validated_02_2014',
          u'validated_03_2014',
          u'validated_04_2014',
          u'validated_05_2014',]
'''
all_ids = []
for month in range(5):
    air1_ids = psda.get_dataids_with_real_values(schema,tables[month],'air1')
    furnace1_ids = psda.get_dataids_with_real_values(schema,tables[month],'furnace1')
    all_ids.append(air1_ids)
    all_ids.append(furnace1_ids)
common_ids = da.utils.get_common_ids(all_ids)
'''
common_ids = [86, 93, 94, 410, 484, 624, 661, 821, 871, 936, 1167, 1334, 1632, 1953, 1994, 2156, 2158, 2171, 2242, 2470, 2575, 2606, 2769, 2814, 2829, 2864, 2953, 2974, 3092, 3192, 3221, 3263, 3367, 3456, 3482, 3504, 3649, 3723, 3736, 3778, 3893, 4135, 4154, 4298, 4526, 4641, 4767, 4874, 4922, 4956, 4957, 4998, 5026, 5109, 5209, 5218, 5395, 5545, 5568, 5677, 5814, 5874, 5938, 5949, 5972, 6139, 6673, 6730, 6836, 6910, 7062, 7319, 7390, 7531, 7536, 7617, 7769, 7788, 7800, 7850, 7863, 7875, 7940, 7951, 8046, 8079, 8142, 8197, 8292, 8342, 8419, 8645, 8669, 8956, 9019, 9036, 9356, 9484, 9578, 9609, 9654, 9701, 9729, 9737, 9771, 9830, 9875, 9915, 9922, 9932, 9934, 9938, 9939, 9982, 9983]
print common_ids


[86, 93, 94, 410, 484, 624, 661, 821, 871, 936, 1167, 1334, 1632, 1953, 1994, 2156, 2158, 2171, 2242, 2470, 2575, 2606, 2769, 2814, 2829, 2864, 2953, 2974, 3092, 3192, 3221, 3263, 3367, 3456, 3482, 3504, 3649, 3723, 3736, 3778, 3893, 4135, 4154, 4298, 4526, 4641, 4767, 4874, 4922, 4956, 4957, 4998, 5026, 5109, 5209, 5218, 5395, 5545, 5568, 5677, 5814, 5874, 5938, 5949, 5972, 6139, 6673, 6730, 6836, 6910, 7062, 7319, 7390, 7531, 7536, 7617, 7769, 7788, 7800, 7850, 7863, 7875, 7940, 7951, 8046, 8079, 8142, 8197, 8292, 8342, 8419, 8645, 8669, 8956, 9019, 9036, 9356, 9484, 9578, 9609, 9654, 9701, 9729, 9737, 9771, 9830, 9875, 9915, 9922, 9932, 9934, 9938, 9939, 9982, 9983]

In [6]:
n = len(common_ids)
n_train = n/2
n_valid = n/4
n_test = n - (n/2 + n/4)
train_ids = common_ids[:n_train]
valid_ids = common_ids[n_train:n_train+n_valid]
test_ids = common_ids[n_train+n_valid:n_train+n_valid+n_test]

print test_ids


[8079, 8142, 8197, 8292, 8342, 8419, 8645, 8669, 8956, 9019, 9036, 9356, 9484, 9578, 9609, 9654, 9701, 9729, 9737, 9771, 9830, 9875, 9915, 9922, 9932, 9934, 9938, 9939, 9982, 9983]

In [7]:
model_file = "models/ac_disagg_subset_cpu.pkl"

with open(model_file,'r') as f:
    model = pickle.load(f)

In [8]:
test_inputs = np.array([[0,0,0,0,0,0,0,0,0,0]])

In [9]:
X = model.get_input_space().make_theano_batch()
Y = model.fprop(X)
f = theano.function([X],Y,allow_input_downcast=True)

In [10]:
outputs = f(test_inputs[:,:,np.newaxis,np.newaxis])

In [11]:
plt.plot(outputs[0])


Out[11]:
[<matplotlib.lines.Line2D at 0x7fbec4620e90>]

In [12]:
instances = psda.generate_instances_for_appliance_by_dataids(schema,tables[2:4],'use',test_ids[:10],sample_rate='15T')


select use,localminute from "PecanStreet_SharedData".validated_03_2014 where dataid=8079
select use,localminute from "PecanStreet_SharedData".validated_04_2014 where dataid=8079
select use,localminute from "PecanStreet_SharedData".validated_03_2014 where dataid=8142
select use,localminute from "PecanStreet_SharedData".validated_04_2014 where dataid=8142
select use,localminute from "PecanStreet_SharedData".validated_03_2014 where dataid=8197
select use,localminute from "PecanStreet_SharedData".validated_04_2014 where dataid=8197
select use,localminute from "PecanStreet_SharedData".validated_03_2014 where dataid=8292
select use,localminute from "PecanStreet_SharedData".validated_04_2014 where dataid=8292
select use,localminute from "PecanStreet_SharedData".validated_03_2014 where dataid=8342
select use,localminute from "PecanStreet_SharedData".validated_04_2014 where dataid=8342
select use,localminute from "PecanStreet_SharedData".validated_03_2014 where dataid=8419
select use,localminute from "PecanStreet_SharedData".validated_04_2014 where dataid=8419
select use,localminute from "PecanStreet_SharedData".validated_03_2014 where dataid=8645
select use,localminute from "PecanStreet_SharedData".validated_04_2014 where dataid=8645
select use,localminute from "PecanStreet_SharedData".validated_03_2014 where dataid=8669
select use,localminute from "PecanStreet_SharedData".validated_04_2014 where dataid=8669
select use,localminute from "PecanStreet_SharedData".validated_03_2014 where dataid=8956
select use,localminute from "PecanStreet_SharedData".validated_04_2014 where dataid=8956
select use,localminute from "PecanStreet_SharedData".validated_03_2014 where dataid=9019
select use,localminute from "PecanStreet_SharedData".validated_04_2014 where dataid=9019

In [13]:
def get_disaggregated_signal(trace,window_length,target_index):
    windows = trace.get_windows(10,1)
    totals = trace.series.values.astype(float)
    class_probs = f(windows[:,:,np.newaxis,np.newaxis])
    n_classes = class_probs.shape[1]

    prepend = np.zeros(target_index,dtype=np.int)
    postpend = np.zeros(window_length - target_index,dtype=np.int)
    classes = np.argmax(class_probs,axis=1)
    all_classes = np.concatenate((prepend,classes,postpend))

    prop_array = np.linspace(0,1,n_classes + 1)
    all_props = np.array([prop_array[class_] for class_ in all_classes])
    return totals,all_props * totals

for instance in instances:
    total_signal,ac_signal = get_disaggregated_signal(instance.traces[0],10,6)
    plt.plot(total_signal)
    plt.plot(ac_signal,'r')
    plt.show()


What went wrong? - lets look at the dataset this was trained on. Does the dataset make sense?


In [14]:
with open('../../data/pylearn2/ac_percentage_subset_test.pkl', 'r') as f:
    test_dataset = pickle.load(f)

In [16]:
plt.hist(np.argmax(test_dataset.y,axis=1),bins=19)


Out[16]:
(array([ 39620.,  16848.,  12656.,   9680.,   7913.,   7104.,   6502.,
          6716.,   6160.,   5544.,   5160.,   4970.,   4832.,   4120.,
          3636.,   2812.,   1970.,   1064.,    651.]),
 array([  1.        ,   1.94736842,   2.89473684,   3.84210526,
          4.78947368,   5.73684211,   6.68421053,   7.63157895,
          8.57894737,   9.52631579,  10.47368421,  11.42105263,
         12.36842105,  13.31578947,  14.26315789,  15.21052632,
         16.15789474,  17.10526316,  18.05263158,  19.        ]),
 <a list of 19 Patch objects>)

In [ ]: