In [2]:
from sklearn import svm
import pandas as pd
import sys
import pickle
import os
import numpy as np
from sklearn.preprocessing import Imputer
from sklearn import preprocessing
sys.path.append('../../')
import disaggregator.utils as utils
import matplotlib.pyplot as plt
import itertools
from sklearn.decomposition import PCA
import fold
from disaggregator import appliance
from disaggregator import evaluation_metrics as evm
import disaggregator as da
from scipy.interpolate import interp1d
In [3]:
reload(utils)
reload(appliance)
reload(fold)
reload(evm)
Out[3]:
In [5]:
use_traces = pickle.load(open(os.path.join('../../','data/use_validated_05_2014.p'),'rb'))
In [6]:
ev_traces = pickle.load((open(os.path.join('../../','data/car1_validated_05_2014.p'),'rb')))
In [36]:
#test = [utils.split_trace_into_rate(x,'D') for x in ev_traces]
In [7]:
len(use_traces)
Out[7]:
In [8]:
len(ev_traces)
Out[8]:
In [9]:
car_ids = [car.metadata['dataid'] for car in ev_traces]
In [13]:
all_ids = [use.metadata['dataid'] for use in use_traces]
In [10]:
X_cars = []
X_without_cars = []
for x in use_traces:
if x.metadata['dataid'] in car_ids:
X_cars.append(x)
else:
X_without_cars.append(x)
In [11]:
equal_class_size = [x for x in X_without_cars[:47]]
In [65]:
#drop nones using resample
class_cars = [x.resample('1T') for x in X_cars]
class_no_cars = [x.resample('1T') for x in equal_class_size]
In [66]:
#split into days
class_cars = [x.split_by('D') for x in class_cars]
In [67]:
class_no_cars = [x.split_by('D') for x in class_no_cars]
In [68]:
class_cars = [car[:30] for car in class_cars]
class_no_cars = [house[:30] for house in class_no_cars]
In [69]:
indices = []
for hidx, home in enumerate(class_cars):
for didx, day in enumerate(home):
if len(day.series)!=1440:
indices.append([hidx,didx])
In [125]:
def fix_one(inds):
home_index=inds[0]
day_index=inds[1]
day = class_cars[home_index][day_index].series.index[0].day
month = class_cars[home_index][day_index].series.index[0].month
year = class_cars[home_index][day_index].series.index[0].year
date = '{}/{}/{}'.format(month,day,year)
print date
rng = pd.date_range(date, periods = 1440, freq='T')
temp = class_cars[home_index][day_index].series.reindex(rng)
temp = temp.astype(float)
temp = temp.interpolate()
temp_app = appliance.ApplianceTrace(temp,class_cars[home_index][day_index].metadata)
return temp_app
In [71]:
for i in indices:
class_cars[i[0]][i[1]]=fix(i)
In [29]:
indices_2 = []
for hidx, home in enumerate(class_no_cars):
for didx, day in enumerate(home):
if len(day.series)!=1440:
indices.append([hidx,didx])
In [31]:
for i in indices_2:
class_no_cars[i[0]][i[1]]=fix(i)
In [74]:
for home in class_cars:
for day in home:
if len(day.series)!=1440:
print 'oops'
In [87]:
chain_class_cars = list(itertools.chain(*new_cars))
chain_class_no_cars = list(itertools.chain(*new_no_cars))
In [104]:
for i in chain_class_cars:
if len(i.series)!=1440:
print 'help'
In [79]:
new_cars = []
for i in class_cars:
if len(i)==30:
new_cars.append(i)
In [85]:
new_no_cars = class_no_cars[:-4]
In [90]:
#set up scaler
X_car_train = [x.series for x in chain_class_cars]
scaler = preprocessing.StandardScaler().fit(X_car_train)
In [113]:
reload(fold)
a,b,c,d= fold.folds(chain_class_cars,chain_class_no_cars,5)
In [163]:
minu =0
plus = 0
for i in b[0]:
if i==-1:
minu=minu+1
else:
plus=plus+1
In [121]:
#FIX ALL THE FOLDS FIRST - this makes way more sense to do it the other way but oh well for now
#returns fixed version
def fix(x_list):
list_return=[]
#imp = Imputer(missing_values='NaN', strategy='mean', axis=0)
for i,train_x in enumerate(x_list):
X_train = [x.series for x in train_x]
#imp.fit(X_train)
#new_X = imp.transform(X_train)
new_X = scaler.transform(X_train)
list_return.append(new_X)
return list_return
In [127]:
#X = list(itertools.chain(*X_air))+list(itertools.chain(*X_ev))
In [144]:
#set up
fixed_train = fix(a)
#fixed_test = fix(c)
In [148]:
fixed_test = fix(c)
In [203]:
clf = svm.SVC(gamma = .1)
In [204]:
clf.fit(fixed_train[0],b[0])
Out[204]:
In [205]:
predictions = clf.predict(fixed_test[0])
In [206]:
stats = evm.get_positive_negative_stats_neg(d[0], predictions)
In [207]:
print(evm.get_table_of_confusion(stats))
In [208]:
print(evm.get_accuracy(stats))
print(evm.get_f1_score(stats))
In [115]:
clf.fit(fixed_train[0],b[0])
Out[115]:
In [151]:
#cross validate!!!
def test_with_diff_params(g):
clf = svm.SVC(gamma = g)
errors = []
stats_all = []
for i in range(5):
clf.fit(fixed_train[i],b[i])
predictions = clf.predict(fixed_test[i])
stats = evm.get_positive_negative_stats_neg(d[i], predictions)
errors.append(1-float(evm.get_accuracy(stats)))
stats_all.append(stats)
return errors,stats_all
In [148]:
st = {}
err = {}
In [178]:
for gam in range(1,11,1):
errs,st_all = test_with_diff_params(float(gam)/14000)
st[gam]=st_all
err[gam]=errs
In [177]:
for e in err.values():
print np.array(e).mean()