In [31]:
from sklearn import svm
import pandas as pd
import sys
sys.path.append('../../')
import pickle
import os
import numpy as np
from sklearn.preprocessing import Imputer
from sklearn import preprocessing
from disaggregator import utils as utils
import matplotlib.pyplot as plt
import itertools
from sklearn.decomposition import PCA
import math
from disaggregator import appliance as app
from disaggregator import evaluation_metrics as evm
%matplotlib inline

In [42]:
reload(utils)
reload(evm)


Out[42]:
<module 'disaggregator.evaluation_metrics' from '../../disaggregator/evaluation_metrics.py'>

In [15]:
air_traces = pickle.load(open(os.path.join('../../','data/air_validated_01_2014.p'),'rb'))

In [16]:
ev_traces = pickle.load((open(os.path.join('../../','data/ev_validated_01_2014.p'),'rb')))

In [22]:
temp = ev_traces[0].metadata
#temp.pop('dataid')


Out[22]:
{'schema': 'shared', 'source': 'PecanStreet', 'table': u'validated_01_2014'}

In [28]:
ev_traces[0].series.index[0]


Out[28]:
Timestamp('2014-01-01 07:46:00-0600', tz='psycopg2.tz.FixedOffsetTimezone(offset=-360, name=None)')

In [23]:
instance = app.ApplianceInstance(ev_traces,temp)

In [30]:
instance_p = app.ApplianceInstance(air_traces, temp)

In [43]:
evm.fraction_energy_assigned_correctly(instance, instance_p)


Out[43]:
nan

In [26]:
a1 = np.array([12,3,4])
a2 = np.array([12,3,4])
a3 = np.array([a1,a2])
np.divide(np.sum(a3), 34)


Out[26]:
1

In [6]:
#drop nones from traces
X_ev= [x for x in ev_traces if not (all(y is None for y in x.series))]
X_air = [x for x in air_traces if not (all(y is None for y in x.series))]


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-6-17b8216f850f> in <module>()
      1 #drop nones from traces
----> 2 X_ev= [x for x in ev_traces if not (all(y is None for y in x.series))]
      3 X_air = [x for x in air_traces if not (all(y is None for y in x.series))]

TypeError: 'type' object is not iterable

In [6]:
#split into days
X_ev = [utils.split_trace_into_rate(x,'D') for x in X_ev]

In [7]:
X_air = [utils.split_trace_into_rate(x,'D') for x in X_air]

In [8]:
colors = ['r' for x in X_ev]

colors = colors + ['g' for x in X_air]

In [9]:
X = list(itertools.chain(*X_air))+list(itertools.chain(*X_ev))

In [10]:
X = [x.series for x in X]

In [11]:
for vec_index in range(len(X)):
    for i in range(len(X[vec_index])):
        if X[vec_index][i]==None:
            X[vec_index][i]=='NaN'

In [12]:
imp = Imputer(missing_values='NaN', strategy='mean', axis=0)
imp.fit(X)


Out[12]:
Imputer(axis=0, copy=True, missing_values='NaN', strategy='mean', verbose=0)

In [13]:
X= imp.transform(X)

In [14]:
pca = PCA(n_components=2)

In [15]:
pca.fit(X)


Out[15]:
PCA(copy=True, n_components=2, whiten=False)

In [16]:
transformed_X = pca.fit_transform(X)

In [20]:
x_points = [x[0] for x in transformed_X]
y_points = [x[1] for x in transformed_X]

In [22]:
plt.scatter(x_points,y_points, c = colors)
plt.show()



In [ ]: