Artigo:
Measure Power Voltage Normalize Edge Detection Cluster Analysis Build Appliance Models Tabulate StatisticsURL
: https://www.semanticscholar.org/paper/Measure-Power-Voltage-Normalize-Edge-Detection-Deshmukh-Lohan/9156bffdc18946ceb3f4e6f56560b238c2fc931dSource-code
: https://github.com/andydesh/nilmEstratégia proposta
: "quebrar" série em chunks (janelas) de sub-séries, extrair características estatísticas e classificação supervisionada.
In [15]:
import csv, time, os, sys
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('ggplot')
plt.rc('text', usetex=False)
#plt.rc('font', family='serif')
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.svm import SVR
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.ensemble import RandomForestClassifier
from sklearn.cluster import KMeans
import pandas as pd
from scipy import interpolate
REDD_RESOURCES_PATH = 'datasets/REDD'
BENCHMARKING_RESOURCES_PATH = 'benchmarkings/cs446 project-electric-load-identification-using-machine-learning/'
sys.path.append(os.path.join(BENCHMARKING_RESOURCES_PATH, 'src'))
from MLData import createInstances, deviceErrors
from energyCalcs import actDevEnergy,appDevEnergy,energyComp
In [2]:
datapath = os.path.join(REDD_RESOURCES_PATH, 'low_freq/house_3/')
# Dados climáticos (não disponibilizado pelos autores - algoritmo adaptado)
weatherfile = 'Data/weather/20110405hourly_Boston.txt'
weatherfile_test = 'Data/weather/20110405hourly_Boston.txt'
available_weatherdata = True if os.path.isdir(weatherfile) else False
fileprefix = 'channel_'
# specify the timewindow for instances
timewindow = 90
# devices to be used in training and testing
use_idx = np.array([3,4,6,7,10,11,13,17,19])
In [3]:
train_vals_start = 0
train_vals_end = 120001
# training data arrays
device_timer = np.zeros(shape=(train_vals_end - train_vals_start,9))
device_power = np.zeros(shape=(train_vals_end - train_vals_start,9))
total_device_power = np.zeros(shape=(train_vals_end - train_vals_start))
weather_timer = np.zeros(shape=(1980))
weather_temp = np.zeros(shape=(1980))
weather_data = np.zeros(shape=(train_vals_end - train_vals_start))
uidx = 0
print("Generating train data (from index {} to {})...".format(train_vals_start, train_vals_end))
# For each device (index), do...
fig = plt.figure(figsize=(20, 10))
for device in range(0, 20):
# If the device is in our list of interest..
if (device in use_idx):
# Load energy data
channel = device + 3
filename = 'channel_'+ str(channel) +'.dat'
filepath = datapath + filename
# x = timestamp / y = energy
xtemp, ytemp = np.loadtxt(filepath,unpack=True)
print('Iter: {} / Device: {}'.format(uidx, device))
device_timer[:, uidx] = xtemp[train_vals_start:train_vals_end]# - 1302930690
device_power[:, uidx] = ytemp[train_vals_start:train_vals_end]
total_device_power += ytemp[train_vals_start:train_vals_end]
plt.plot(device_timer[:,uidx], device_power[:,uidx])
uidx = uidx + 1
plt.title('Train Data - Power usage by Appliance')
plt.legend(use_idx)
plt.xlabel('Timestamp')
#ticks = xtemp[list(range(train_vals_start, train_vals_end-1, 10000)) + [train_vals_end]]
#plt.xticks(ticks, rotation='vertical')
plt.xlim(xtemp[train_vals_start], xtemp[train_vals_end])
plt.ylabel('Power')
plt.show()
################################################################
# read the weather data
if available_weatherdata:
wfile = open(weatherfile,'rt')
rownum = 0
try:
wreader = csv.reader(wfile, delimiter=',')
for row in wreader:
#print row[1]+','+row[2]+','+row[10]
wdate = row[1]
wtime = row[2]
wdatelist = list(wdate)
wtimelist = list(wtime)
timedatestr = ''.join(wdatelist[0:4])+'-'+ ''.join(wdatelist[4:6])+'-'+''.join(wdatelist[6:8]) +'-'+ ''.join(wtimelist[0:2])+'-'+''.join(wtimelist[2:4])+'-'+'00'
weather_timer[rownum] = int(time.mktime(time.strptime(timedatestr,"%Y-%m-%d-%H-%M-%S")))
weather_temp[rownum] = int(row[10])
#print str(weather_timer[rownum]) + ','+ str(weather_temp[rownum])
rownum = rownum + 1
interp_func = interpolate.interp1d(weather_timer, weather_temp)
weather_data = interp_func(device_timer[:,0])
finally:
wfile.close
In [4]:
xtemp[list(range(train_vals_start, train_vals_end-1, 100)) + [train_vals_end]]
Out[4]:
In [5]:
test_vals_start = 120001
test_vals_end = 240002
# test data arrays
device_timer_test = np.zeros(shape=(test_vals_end - test_vals_start, 9))
device_power_test = np.zeros(shape=(test_vals_end - test_vals_start, 9))
total_device_power_test = np.zeros(shape=(test_vals_end - test_vals_start))
weather_timer_test = np.zeros(shape= (1980))
weather_temp_test = np.zeros(shape= (1980))
weather_data_test = np.zeros(shape=(test_vals_end - test_vals_start))
uidx = 0
print("Generating test data (from index {} to {})...".format(test_vals_start, test_vals_end))
# For each device (index), do...
fig = plt.figure(figsize=(20, 10))
for device in range(0, 20):
# If the device is in our list of interest..
if (device in use_idx):
# Load energy data
channel = device + 3
filename = 'channel_'+ str(channel) +'.dat'
filepath = datapath + filename
# x = timestamp / y = energy
xtemp, ytemp = np.loadtxt(filepath,unpack=True)
print('Iter: {} / Device: {}'.format(uidx, device))
device_timer_test[:,uidx] = xtemp[test_vals_start:test_vals_end]# - 1302930690
device_power_test[:,uidx] = ytemp[test_vals_start:test_vals_end]
total_device_power_test += ytemp[test_vals_start:test_vals_end]
plt.plot(device_timer_test[:,uidx], device_power_test[:,uidx])
uidx = uidx + 1
plt.title('Test Data - Power usage by Appliance')
plt.legend(use_idx)
plt.xlabel('Timestamp')
#ticks = xtemp[list(range(test_vals_start, test_vals_end-1, 10000)) + [test_vals_end]]
#plt.xticks(ticks, rotation='vertical')
plt.xlim(xtemp[test_vals_start], xtemp[test_vals_end])
plt.ylabel('Power')
plt.show()
################################################################
# read the weather data
if available_weatherdata:
wfile = open(weatherfile,'rt')
rownum = 0
try:
wreader = csv.reader(wfile, delimiter=',')
for row in wreader:
#print row[1]+','+row[2]+','+row[10]
wdate = row[1]
wtime = row[2]
wdatelist = list(wdate)
wtimelist = list(wtime)
timedatestr = ''.join(wdatelist[0:4])+'-'+ ''.join(wdatelist[4:6])+'-'+''.join(wdatelist[6:8]) +'-'+ ''.join(wtimelist[0:2])+'-'+''.join(wtimelist[2:4])+'-'+'00'
weather_timer_test[rownum] = int(time.mktime(time.strptime(timedatestr,"%Y-%m-%d-%H-%M-%S")))
weather_temp_test[rownum] = int(row[10])
#print str(weather_timer[rownum]) + ','+ str(weather_temp[rownum])
rownum = rownum + 1
interp_func = interpolate.interp1d(weather_timer_test, weather_temp_test)
weather_data_test = interp_func(device_timer_test[:,0])
finally:
wfile.close
In [6]:
################################################################
# create the instances and labels from the training data
classifier = 3 # 1 - Naive Bayes, 2 - Regression, 3 - SVM, 4 - Linear Discriminant Analysis, 5 - Random Forest Classifier
# Treino / Teste
print('Train -> ', end='')
train_instances, train_labels, train_labels_binary, train_power_chunks = createInstances(
total_device_power, device_timer, device_power, weather_data,
classifier,
timewindow)
print('Test -> ', end='')
test_instances,test_labels,test_labels_binary, test_power_chunks = createInstances(
total_device_power_test, device_timer_test, device_power_test, weather_data_test,
classifier,
timewindow)
In [43]:
from IPython.display import display
# Data persistence
np.save( os.path.join(BENCHMARKING_RESOURCES_PATH, 'datasets/train_instances.npy'), train_instances)
np.save( os.path.join(BENCHMARKING_RESOURCES_PATH, 'datasets/train_labels.npy'), train_labels)
np.save( os.path.join(BENCHMARKING_RESOURCES_PATH, 'datasets/train_labels_binary.npy'), train_labels_binary)
np.save( os.path.join(BENCHMARKING_RESOURCES_PATH, 'datasets/train_power_chunks.npy'), train_power_chunks)
np.save( os.path.join(BENCHMARKING_RESOURCES_PATH, 'datasets/test_instances.npy'), test_instances)
np.save( os.path.join(BENCHMARKING_RESOURCES_PATH, 'datasets/test_labels.npy'), test_labels)
np.save( os.path.join(BENCHMARKING_RESOURCES_PATH, 'datasets/test_labels_binary.npy'), test_labels_binary)
np.save( os.path.join(BENCHMARKING_RESOURCES_PATH, 'datasets/test_power_chunks.npy'), test_power_chunks)
# Dataframe DE TREINO com as estatísticas para cada chunk (projeto benchmarking analisado)
print('Benchmarking dataset...')
statistics = ["MEAN_POWER", "STD_POWER", "TIME_OF_DAY", "AMBIENT_TEMPERATURE", "PEAK_POWER", "ENERGY", "DAY_OF_WEEK"]
df_statistics = pd.DataFrame(
np.concatenate([train_instances, train_labels_binary], axis=1),
columns = statistics + ["APLIANCE_{}".format(i) for i in use_idx]
)
df_statistics.to_csv( os.path.join(BENCHMARKING_RESOURCES_PATH, 'datasets/df_statistics.csv') )
display(df_statistics.head())
# Dataframe com a carga da residencia e labels de aparelhos (usados para tese)
print('Thesis benchmarking dataset...')
df_power_chunks = pd.DataFrame(
np.concatenate([train_power_chunks, train_labels_binary], axis=1),
columns = ["POWER_{}".format(i) for i in range(train_power_chunks.shape[1])] + ["APLIANCE_{}".format(i) for i in use_idx]
)
df_power_chunks.to_csv( os.path.join(BENCHMARKING_RESOURCES_PATH, 'datasets/df_power_chunks.csv') )
display(df_power_chunks.head())
In [8]:
for clf_idx in range (1,7):
#clf = i
if clf_idx == 1:
cLabel = 'Naive Bayes'
clf = MultinomialNB()
MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)
elif clf_idx == 2:
cLabel = 'Logistic Regression'
clf = LogisticRegression()
LogisticRegression(C = 1.0, penalty = 'l1', tol=1e-6)
elif clf_idx == 3:
cLabel = 'SVM'
clf = SVC()
elif clf_idx == 4:
cLabel = 'Linear Discriminant Analysis'
clf = LDA()
elif clf_idx == 5:
cLabel = 'Random Forest Classifier'
clf = RandomForestClassifier(n_estimators=10)
SVR(C = 1.0, epsilon=0.2)
elif clf_idx ==6:
cLabel = 'K-means clustering'
clf = KMeans(n_clusters=512, init='random')
t0 = time.time()
clf.fit(train_instances, train_labels)
t1 = time.time()
nd = len(use_idx)
# prediction on training and test data
accuracyTr, dev_acc_train, predicted_labels_binary_train = deviceErrors(
clf,nd,train_instances,train_labels,train_labels_binary)
accuracyTs, dev_acc_test, predicted_labels_binary_test = deviceErrors(
clf,nd,test_instances,test_labels,test_labels_binary)
# prediction of device energy consumption
agg_energy_train = train_instances[:,5]
actEnergy_train = actDevEnergy(device_power,device_timer,nd)
appEnergy_train = appDevEnergy(train_labels_binary,agg_energy_train,nd)
preEnergy_train = appDevEnergy(predicted_labels_binary_train,agg_energy_train,nd)
acTap_train, acTpre_train, apTde_train = energyComp(actEnergy_train, appEnergy_train, preEnergy_train)
agg_energy_test = test_instances[:,5]
actEnergy_test = actDevEnergy(device_power_test,device_timer_test,nd)
appEnergy_test = appDevEnergy(test_labels_binary,agg_energy_test,nd)
preEnergy_test = appDevEnergy(predicted_labels_binary_test,agg_energy_test,nd)
acTap_test, acTpre_test, apTde_test = energyComp(actEnergy_test, appEnergy_test, preEnergy_test)
t2 = time.time()
t3 = time.time()
trainTime = t1-t0
test1Time = t2-t1
test2Time = t3-t2
print( '================================================================================')
print( 'Classifier = ' + cLabel)
print( 'Computational Expense for Training Classifier = ' + str(trainTime) + 's')
print( '------------------------- Results for Traning Data -----------------------------')
print( 'Percent Accuracy on Training Data = ' + str(accuracyTr) + '%')
print( 'Percent Accuracy per device on Training Data = ' + str(dev_acc_train) + '%')
print( 'Actual Device Energy on Training Data = ' + str(actEnergy_train))
print( 'Approx Device Energy on Training Data = ' + str(appEnergy_train))
print( 'Predicted Device Energy on Training Data = ' + str(preEnergy_train))
print( 'Computational Expense Classifying Training Data = ' + str(test1Time) + 's')
print( 'Device Accuracy Approx. vs Actual = ' + str(acTap_train))
print( 'Device Accuracy Pre. vs. Actual = ' + str(acTpre_train))
print( 'Device Accuracy Pre. vs. approx. = ' + str(apTde_train))
print( '------------------------- Results for Test Data -----------------------------')
print( 'Percent Accuracy on Test Data = ' + str(accuracyTs) + '%')
print( 'Percent Accuracy per device on Test Data = ' + str(dev_acc_test) + '%')
print( 'Actual Device Energy on Test Data = ' + str(actEnergy_test))
print( 'Approx Device Energy on Test Data = ' + str(appEnergy_test))
print( 'Predicted Device Energy on Test Data = ' + str(preEnergy_test))
print( 'Computational Expense Classifying Test Data = ' + str(test2Time) + 's')
print( 'Device Accuracy Approx. vs Actual = ' + str(acTap_test))
print( 'Device Accuracy Pre. vs. Actual = ' + str(acTpre_test))
print( 'Device Accuracy Pre. vs. approx. = ' + str(apTde_test))
print('\n\n')
In [9]:
# # compute the energy consumption of each device.
# ################################################################
# #plot 4 of the devices for illustration
# fig = plt.figure(0)
# lendev = len(device_timer[:,0])
# ax1 = plt.subplot(221)
# plt.plot((device_timer[:,0]-device_timer[0,0])/(device_timer[lendev-1,0]-device_timer[0,0]),device_power[:,0])
# ax1.set_title('Electronics')
# plt.ylabel('Device Power (W)')
# ax2 = plt.subplot(222)
# plt.plot((device_timer[:,0]-device_timer[0,0])/(device_timer[lendev-1,0]-device_timer[0,0]),device_power[:,1])
# ax2.set_title('Refrigerator')
# #plt.ylabel('Device Power (W)')
# ax3 = plt.subplot(223)
# plt.plot((device_timer[:,0]-device_timer[0,0])/(device_timer[lendev-1,0]-device_timer[0,0]),device_power[:,3])
# ax3.set_title('Furnace')
# plt.xlabel('Normalized Time')
# plt.ylabel('Device Power (W)')
# ax4 = plt.subplot(224)
# plt.plot((device_timer[:,0]-device_timer[0,0])/(device_timer[lendev-1,0]-device_timer[0,0]),device_power[:,5])
# ax4.set_title('Washer Dryer 2')
# plt.xlabel('Normalized Time')
# #plt.ylabel('Device Power (W)')
# fig = plt.figure(1)
# plt.plot((device_timer[0:288,0]-device_timer[0,0])/(device_timer[288-1,0]-device_timer[0,0]),device_power[0:288,0])
# plt.show()
# plt.ylabel('Mains Power Consumption (W)')
# plt.xlabel('time (s)')
Foi possível reproduzir o experimento conduzido no projeto CS446 Project: Electric Load Identification using Machine Learning
, onde se extrai atributos para cada subset (chunks) de séries temporais de medições de energia residencial de baixa frequência, tais como:
* Average power instance
* Std deviation of power
* Local hour of the day fraction
* Local average temperature during the 5 minute window
* Maximum power reading
* The energy (integral of power) reading
* Day of the week
Todavia, no processo de implementação foram detectadas algumas limitações, destacando-se:
1. O autor não compartilhou os dados metereológicos utilizados no estudo. Sobre esta questão, a saída foi adaptar o algoritmo para pré-processar os dados de modo que ignora-se este atributo;
2. Muitas inconsitências na implementação, como a definição do intervalo de medição a partir do cabeçãlho da série. Este é um ponto problemático, uma vez que as medições individuais dos aparelhos não estão normalizadas por padrão, e o intervalo pode sofrer variação (como ocorre no conjunto de TESTE, que resultou em um `timestep` de 9 unidades (diferente dos 3 (segundos) existentes na corrente principal da casa.
Sendo assim, um ponto de evolução é implementar a extração de características a partir do pacote NILMTK
, que permite processar os dados mantendo a integridade das informações. Por ora, foi fixado o timestep
em 3 segundos (sampling_rate) no código do projeto (mais especificamente na linha 14 do arquivo PROJETO_PATH/src/MLData.py).
In [10]:
# timestep = device_timer[2, 1] - device_timer[1, 1]
# print('timestep: {}'.format(timestep))
# numdata = len(total_device_power)
# print('numdata: {}'.format(numdata))
# numdevices = len(device_power[0])
# print('numdevices: {}'.format(numdevices))
# idxstep = int(timewindow/timestep)
# print('idxstep: {}'.format(idxstep))
# numinstances = int(numdata/idxstep)
# print('numinstances: {}'.format(numinstances))
# binarylabels = np.zeros(shape=(numinstances,numdevices),dtype=np.int)
# print('binarylabels: {}'.format(binarylabels.shape))
In [11]:
# Timestep inconsistence
device_timer_test[2, 1] - device_timer_test[1, 1]
Out[11]:
In [14]:
train_instances.shape
Out[14]:
In [13]:
test_instances.shape
Out[13]:
In [ ]: