In [16]:
import sys
import os.path
sys.path.append(os.path.abspath(os.path.join(os.pardir,os.pardir)))
import disaggregator as da
import disaggregator.PecanStreetDatasetAdapter as psda
import pandas as pd
import numpy as np
import copy
db_url = "postgresql://USERNAME:PASSWORD@db.wiki-energy.org:5432/postgres"
psda.set_url(db_url)
schema = 'shared'
tables = [u'validated_01_2014',
u'validated_02_2014',
u'validated_03_2014',
u'validated_04_2014',
u'validated_05_2014',]
Get two traces from march and one from april sampled at 15T, then one from april sampled at 1T.
In [2]:
march_dataids = psda.get_table_dataids(schema,tables[2])
april_dataids = psda.get_table_dataids(schema,tables[3])
In [11]:
march_trace1,march_trace2 = psda.generate_traces_for_appliance_by_dataids(schema,tables[2],'use',march_dataids[:2],sample_rate='15T')
april_trace1 = psda.generate_appliance_trace(schema,tables[3],'use',april_dataids[1],sample_rate='15T')
april_trace2 = psda.generate_appliance_trace(schema,tables[3],'use',april_dataids[1],sample_rate='1T')
In [12]:
print [trace.series.index for trace in [march_trace1, march_trace2, april_trace1, april_trace2]]
Determine whether or not two traces align.
In [13]:
def traces_aligned(traces):
"""
Returns True if traces are temporally aligned
"""
indices = [trace.series.index for trace in traces]
for index in indices[1:]:
if not indices[0].equals(index):
return False
return True
In [14]:
assert(traces_aligned([march_trace1,march_trace2])) # drawn from same table
assert(not traces_aligned([march_trace1,april_trace1])) # drawn from different table
assert(not traces_aligned([april_trace1,april_trace2])) # sampled differently
In [15]:
print [trace.series.index for trace in [march_trace1, march_trace2, april_trace1, april_trace2]]
Align two misaligned traces.
In [17]:
def align_traces(traces,to=None,how="front"):
"""
Temporally aligns the traces. `how`="front" means to align to the front of
the `to` trace. If no `to` trace is given, the first shortest trace is used.
Traces are all downsampled to match the lowest sampling rate
"""
# make copies
traces=copy.deepcopy(traces)
# if already aligned, don't do extra work.
if traces_aligned(traces):
return traces
# resample to the same frequency
frequencies = [pd.tseries.frequencies.to_offset(trace.series.index.freq)
for trace in traces if trace.series.index.freq]
new_freq = sorted(frequencies,reverse=True)[0]
for trace in traces:
trace.resample(new_freq)
# determine where to shift to and how much to cut off
if not to:
shortest_i = np.argsort([trace.series.size for trace in traces])[0]
to = traces[shortest_i]
cutoff = to.series.size
else:
all_traces = traces[:]
all_traces.append(to)
shortest_i = np.argsort([trace.series.size for trace in all_traces])[0]
cutoff = all_traces[shortest_i].series.size
# shift
if how == 'front':
offsets = [to.series.index[0] - trace.series.index[0] for trace in traces]
for trace,offset in zip(traces,offsets):
trace.series.index = trace.series.index + offset
else:
raise NotImplementedError
# cut off extra:
for trace in traces:
trace.series = trace.series[:cutoff]
return traces
In [19]:
print [trace.series.index for trace in [march_trace1, march_trace2, april_trace1, april_trace2]]
print
print [trace.series.index for trace in align_traces([march_trace1,march_trace2])]
print
print [trace.series.index for trace in align_traces([april_trace1,april_trace2])]
print
print [trace.series.index for trace in align_traces([april_trace1,april_trace2],to=march_trace1)]
print
In [12]:
print dir(march_trace1.series.index)
In [39]:
print pd.tseries.frequencies.to_offset('60S')==pd.tseries.frequencies.to_offset('1T')
print sorted([pd.tseries.frequencies.to_offset('1S'),pd.tseries.frequencies.to_offset('1T')],reverse=True)[0].freqstr
pd.tseries.frequencies.to_offset('60S').freqstr
Out[39]:
In [41]:
march_trace1.series.size
Out[41]:
In [19]:
march_trace1.series.index + pd.tseries.frequencies.to_offset('60S')
Out[19]:
In [ ]: