Artigo:
...URL
: ...Source-code
: ...Estratégia proposta
: converter série-temporal em GRÁFICO DE RECORRÊNCIA, extrair características com DNN (VG16) e classificação supervisionada.
In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('ggplot')
plt.rc('text', usetex=False)
from matplotlib.image import imsave
import pandas as pd
import pickle as cPickle
import os, sys, cv2
from math import *
from pprint import pprint
from tqdm import tqdm_notebook
from mpl_toolkits.axes_grid1 import make_axes_locatable
from PIL import Image
from glob import glob
from IPython.display import display
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.preprocessing import image as keras_image
from tensorflow.keras.applications.vgg16 import preprocess_input
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score
from pyts.image import RecurrencePlot
REDD_RESOURCES_PATH = 'datasets/REDD'
BENCHMARKING2_RESOURCES_PATH = 'benchmarkings/Imaging-NILM-time-series/'
HYPOTHESIS_RESOURCES_PATH = 'datasets/hipotese1-recurrenceplot-vggembedding/'
sys.path.append(os.path.join(BENCHMARKING2_RESOURCES_PATH, ''))
from serie2QMlib import *
In [2]:
def standardize(serie):
dev = np.sqrt(np.var(serie))
mean = np.mean(serie)
return [(each - mean) / dev for each in serie]
# Rescale data into [0,1]
def rescale(serie):
maxval = max(serie)
minval = min(serie)
gap = float(maxval - minval)
return [(each - minval) / gap for each in serie]
#modified from https://stackoverflow.com/questions/33650371/recurrence-plot-in-python
from sklearn.metrics import pairwise
def recurrence_plot(s, eps=None, steps=None, scaling = False):
if type(s) == list:
s = np.array(s)[:, None]
if scaling:
s = rescale(s)
if eps==None: eps=0.1
if steps==None: steps=10
d = pairwise.pairwise_distances(s)
d = np.floor(d / eps)
d[d > steps] = steps
#Z = squareform(d)
return d
In [3]:
#################################
###Define the parameters here####
#################################
#datafiles = ['dish washer1-1'] # Data file name (TODO: alterar aqui)
#trains = [250] # Number of training instances (because we assume training and test data are mixed in one file)
size = [32] # PAA size
#GAF_type = 'GADF' # GAF type: GASF, GADF
#save_PAA = True # Save the GAF with or without dimension reduction by PAA: True, False
#rescale_type = 'Zero' # Rescale the data into [0,1] or [-1,1]: Zero, Minusone
directory = os.path.join(HYPOTHESIS_RESOURCES_PATH, '') #the directory will be created if it does not already exist. Here the images will be stored
if not os.path.exists(directory):
os.makedirs(directory)
A fim de normalizar os benchmarkings, serão utilizados os dados das séries do bechmarking 1
para o processo de Extração de Características (conversão serie2image
- benchmarking 2).
In [4]:
def serie2RP(serie, scaling = False, s = 32):
"""
Customized function to perform Series to Image conversion.
Args:
serie : original input data (time-serie chunk of appliance/main data - REDD - benchmarking 1)
s : Size of output paaimage originated from serie [ INFO: PAA = (32, 32) / noPAA = (50, 50) ]
"""
rp = None
rpmatrix = None
std_data = serie
if scaling:
std_data = rescale(std_data)
paalistcos = recurrence_plot(std_data)#, s, None)
# paalistcos = rescale(paa(each[1:],s,None))
# paalistcos = rescaleminus(paa(each[1:],s,None))
################raw###################
datacos = np.array(std_data)
#print(datacos)
datasin = np.sqrt(1 - np.array(std_data) ** 2)
#print(datasin)
paalistcos = np.array(paalistcos)
paalistsin = np.sqrt(1 - paalistcos ** 2)
datacos = np.matrix(datacos)
datasin = np.matrix(datasin)
paalistcos = np.matrix(paalistcos)
paalistsin = np.matrix(paalistsin)
if GAF_type == 'GASF':
paamatrix = paalistcos.T * paalistcos - paalistsin.T * paalistsin
matrix = np.array(datacos.T * datacos - datasin.T * datasin)
elif GAF_type == 'GADF':
paamatrix = paalistsin.T * paalistcos - paalistcos.T * paalistsin
matrix = np.array(datasin.T * datacos - datacos.T * datasin)
else:
sys.exit('Unknown GAF type!')
#label = np.asarray(label)
image = matrix
paaimage = np.array(paamatrix)
matmatrix = np.asarray(matmatrix)
fullmatrix = np.asarray(fullmatrix)
#
# maximage = maxsample(image, s)
# maxmatrix = np.asarray(np.asarray([each.flatten() for each in maximage]))
if save_PAA == False:
finalmatrix = matmatrix
else:
finalmatrix = fullmatrix
# uncomment below if needed data in pickled form
# pickledata(finalmatrix, label, train, datafilename)
#gengramImgs(image, paaimage, label, directory)
return image, paaimage, matmatrix, fullmatrix, finalmatrix
In [11]:
# Reading power dataset (benchmark 1)
BENCHMARKING1_RESOURCES_PATH = "benchmarkings/cs446 project-electric-load-identification-using-machine-learning/"
size_paa = 32
size_without_paa = 30
# devices to be used in training and testing
use_idx = np.array([3,4,6,7,10,11,13,17,19])
label_columns_idx = ["APLIANCE_{}".format(i) for i in use_idx]
In [7]:
# Train...
train_power_chunks = np.load( os.path.join(BENCHMARKING1_RESOURCES_PATH, 'datasets/train_power_chunks.npy') )
train_labels_binary = np.load( os.path.join(BENCHMARKING1_RESOURCES_PATH, 'datasets/train_labels_binary.npy') )
# Testando função de conversão serie -> RP
# serie = list(np.sin(np.linspace(0,24,1000)))
serie = train_power_chunks[22, :].tolist()
image = RecurrencePlot().fit_transform([serie])[0]
# Visualizing Serie/RP
fig = plt.figure(figsize=(15,5))
fig.tight_layout() # Or equivalently, "plt.tight_layout()"
ax = fig.add_subplot(1, 1, 1)
ax.imshow(image, origin="lower", aspect="auto")
ax = fig.add_subplot(1, 2, 1)
plt.plot(list(range(0,len(serie))), serie);
In [11]:
print("Processing train dataset (Series to Images)...")
image_size_px = 32
image_size_inch = round(image_size_px / 71, 2) # Convert px to inch
fig = plt.figure(frameon=False)
fig.set_size_inches(image_size_inch, image_size_inch) # 0.45inch = 32px
rp_series_train = []
# Serie -> RP
X_rp = RecurrencePlot().fit_transform(train_power_chunks)
#for idx, row in tqdm_notebook(df_power_chunks.iterrows(), total = df_power_chunks.shape[0]):
for idx, power_chunk in tqdm_notebook(enumerate(train_power_chunks), total = train_power_chunks.shape[0]):
serie = power_chunk
image = X_rp[idx]
labels = train_labels_binary[idx, :].astype('str').tolist()
labels_str = ''.join(labels)
# Persist image data files (PAA - noPAA)
np.save(
os.path.join(
HYPOTHESIS_RESOURCES_PATH,
"{}_{}_train.npy".format(idx, labels_str)
),
image
)
# x is the array you want to save
imsave(
os.path.join(
HYPOTHESIS_RESOURCES_PATH,
"{}_{}_train_color.png".format(idx, labels_str)
),
arr=image
)
# ax = plt.Axes(fig, [0., 0., 1., 1.])
# ax.set_axis_off()
# fig.add_axes(ax)
# ax.imshow(image, aspect='auto')
# fig.savefig(os.path.join( HYPOTHESIS_RESOURCES_PATH, "{}_{}_train_color.png".format(idx, labels_str) ))
Image.fromarray(image*255).convert('RGB').resize((image_size_px, image_size_px)).save(os.path.join( HYPOTHESIS_RESOURCES_PATH, "{}_{}_train_black.png".format(idx, labels_str) ))
rp_series_train.append( list([idx]) + list(image.flatten()) + list(labels) )
# VIsualizing some results...
plt.figure(figsize=(8,6));
ax1 = plt.subplot(121);
plt.title("RP - RGB");
color_rp = plt.imread(os.path.join( HYPOTHESIS_RESOURCES_PATH, "{}_{}_train_color.png".format(idx, labels_str) ))
plt.imshow(color_rp, origin="lower");
divider = make_axes_locatable(ax1);
cax = divider.append_axes("right", size="2.5%", pad=0.2);
plt.colorbar(cax=cax);
ax2 = plt.subplot(122);
black_rp = plt.imread(os.path.join( HYPOTHESIS_RESOURCES_PATH, "{}_{}_train_black.png".format(idx, labels_str) ))
plt.title("RP - BlackWhite");
plt.imshow(black_rp, origin="lower");
print('Saving processed data...')
df_rp_train = pd.DataFrame(
data = rp_series_train,
columns = list(["IDX"]) + ["DIMESION_{}".format(d) for d in range(len(image.flatten()))] + list(label_columns_idx)
)
df_rp_train.to_csv(os.path.join( HYPOTHESIS_RESOURCES_PATH, "df_rp_train.csv"))
In [12]:
# Test...
test_power_chunks = np.load( os.path.join(BENCHMARKING1_RESOURCES_PATH, 'datasets/test_power_chunks.npy') )
test_labels_binary = np.load( os.path.join(BENCHMARKING1_RESOURCES_PATH, 'datasets/test_labels_binary.npy') )
# Testando função de conversão serie -> RP
# serie = list(np.sin(np.linspace(0,24,1000)))
serie = test_power_chunks[22, :].tolist()
image = RecurrencePlot().fit_transform([serie])[0]
# Visualizing Serie/RP
fig = plt.figure(figsize=(15,5))
fig.tight_layout() # Or equivalently, "plt.tight_layout()"
ax = fig.add_subplot(1, 1, 1)
ax.imshow(image, origin="lower", aspect="auto")
ax = fig.add_subplot(1, 2, 1)
plt.plot(list(range(0,len(serie))), serie);
In [13]:
print("Processing test dataset (Series to Images)...")
image_size_px = 32
image_size_inch = round(image_size_px / 71, 2) # Convert px to inch
fig = plt.figure(frameon=False)
fig.set_size_inches(image_size_inch, image_size_inch) # 0.45inch = 32px
rp_series_test = []
# Serie -> RP
X_rp = RecurrencePlot().fit_transform(test_power_chunks)
#for idx, row in tqdm_notebook(df_power_chunks.iterrows(), total = df_power_chunks.shape[0]):
for idx, power_chunk in tqdm_notebook(enumerate(test_power_chunks), total = test_power_chunks.shape[0]):
serie = power_chunk
image = X_rp[idx]
labels = test_labels_binary[idx, :].astype('str').tolist()
labels_str = ''.join(labels)
# Persist image data files (PAA - noPAA)
np.save(
os.path.join(
HYPOTHESIS_RESOURCES_PATH,
"{}_{}_test.npy".format(idx, labels_str)
),
image
)
# x is the array you want to save
imsave(
os.path.join(
HYPOTHESIS_RESOURCES_PATH,
"{}_{}_test_color.png".format(idx, labels_str)
),
arr=image
)
# ax = plt.Axes(fig, [0., 0., 1., 1.])
# ax.set_axis_off()
# fig.add_axes(ax)
# ax.imshow(image, aspect='auto')
# fig.savefig(os.path.join( HYPOTHESIS_RESOURCES_PATH, "{}_{}_test_color.png".format(idx, labels_str) ))
Image.fromarray(image*255).convert('RGB').resize((image_size_px, image_size_px)).save(os.path.join( HYPOTHESIS_RESOURCES_PATH, "{}_{}_test_black.png".format(idx, labels_str) ))
rp_series_test.append( list([idx]) + list(image.flatten()) + list(labels) )
# VIsualizing some results...
plt.figure(figsize=(8,6));
ax1 = plt.subplot(121);
plt.title("RP - RGB");
color_rp = plt.imread(os.path.join( HYPOTHESIS_RESOURCES_PATH, "{}_{}_test_color.png".format(idx, labels_str) ))
plt.imshow(color_rp, origin="lower");
divider = make_axes_locatable(ax1);
cax = divider.append_axes("right", size="2.5%", pad=0.2);
plt.colorbar(cax=cax);
ax2 = plt.subplot(122);
black_rp = plt.imread(os.path.join( HYPOTHESIS_RESOURCES_PATH, "{}_{}_test_black.png".format(idx, labels_str) ))
plt.title("RP - BlackWhite");
plt.imshow(black_rp, origin="lower");
print('Saving processed data...')
df_rp_test = pd.DataFrame(
data = rp_series_test,
columns = list(["IDX"]) + ["DIMESION_{}".format(d) for d in range(len(image.flatten()))] + list(label_columns_idx)
)
df_rp_test.to_csv(os.path.join( HYPOTHESIS_RESOURCES_PATH, "df_rp_test.csv"))
In [5]:
def metrics(test, predicted):
##CLASSIFICATION METRICS
acc = accuracy_score(test, predicted)
prec = precision_score(test, predicted)
rec = recall_score(test, predicted)
f1 = f1_score(test, predicted)
f1m = f1_score(test, predicted, average='macro')
# print('f1:',f1)
# print('acc: ',acc)
# print('recall: ',rec)
# print('precision: ',prec)
# # to copy paste print
#print("{:.4}\t{:.4}\t{:.4}\t{:.4}\t{:.4}".format(acc, prec, rec, f1, f1m))
# ##REGRESSION METRICS
# mae = mean_absolute_error(test_Y,pred)
# print('mae: ',mae)
# E_pred = sum(pred)
# E_ground = sum(test_Y)
# rete = abs(E_pred-E_ground)/float(max(E_ground,E_pred))
# print('relative error total energy: ',rete)
return acc, prec, rec, f1, f1m
def plot_predicted_and_ground_truth(test, predicted):
#import matplotlib.pyplot as plt
plt.plot(predicted.flatten(), label = 'pred')
plt.plot(test.flatten(), label= 'Y')
plt.show()
return
def embedding_images(images, model):
# Feature extraction process with VGG16
vgg16_feature_list = [] # Attributes array (vgg16 embedding)
y = [] # Extract labels from name of image path[]
for path in tqdm_notebook(images):
img = keras_image.load_img(path, target_size=(100, 100))
x = keras_image.img_to_array(img)
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)
# "Extracting" features...
vgg16_feature = vgg16_model.predict(x)
vgg16_feature_np = np.array(vgg16_feature)
vgg16_feature_list.append(vgg16_feature_np.flatten())
# Image (chuncked serie)
file_name = path.split("\\")[-1].split(".")[0]
image_labels = [int(l) for l in list(file_name.split("_")[1])]
y.append(image_labels)
X = np.array(vgg16_feature_list)
return X, y
In [3]:
# Building dnn model (feature extraction)
vgg16_model = VGG16(
include_top=False,
weights='imagenet',
input_tensor=None,
input_shape=(100, 100, 3),
pooling='avg',
classes=1000
)
In [6]:
# Colored recurrence plots generated
images = sorted(glob(
os.path.join(
HYPOTHESIS_RESOURCES_PATH,
"*_train_color.png"
)
))
X_train, y_train = embedding_images(images, vgg16_model)
# Data persistence
np.save( os.path.join(HYPOTHESIS_RESOURCES_PATH, 'X_train.npy'), X_train)
np.save( os.path.join(HYPOTHESIS_RESOURCES_PATH, 'y_train.npy'), y_train)
In [8]:
images = sorted(glob(
os.path.join(
HYPOTHESIS_RESOURCES_PATH,
"*_test_color.png"
)
))
X_test, y_test = embedding_images(images, vgg16_model)
# Data persistence
np.save( os.path.join(HYPOTHESIS_RESOURCES_PATH, 'X_test.npy'), X_test)
np.save( os.path.join(HYPOTHESIS_RESOURCES_PATH, 'y_test.npy'), y_test)
In [9]:
# Training supervised classifier
clf = DecisionTreeClassifier(max_depth=15)
# Train classifier
clf.fit(X_train, y_train)
# Save classifier for future use
#joblib.dump(clf, 'Tree'+'-'+device+'-redd-all.joblib')
Out[9]:
In [12]:
# Predict test data
y_pred = clf.predict(X_test)
# Print metrics
final_performance = []
y_test = np.array(y_test)
y_pred = np.array(y_pred)
print("")
print("RESULT ANALYSIS\n\n")
print("ON/OFF State Charts")
print("-" * 115)
for i in range(y_test.shape[1]):
fig = plt.figure(figsize=(15, 2))
plt.title("Appliance #{}".format( label_columns_idx[i]))
plt.plot(y_test[:, i].flatten(), label = "True Y")
plt.plot( y_pred[:, i].flatten(), label = "Predicted Y")
plt.xlabel('Sample')
plt.xticks(range(0, y_test.shape[0], 50))
plt.xlim(0, y_test.shape[0])
plt.ylabel('Status')
plt.yticks([0, 1])
plt.ylim(0,1)
plt.legend()
plt.show()
acc, prec, rec, f1, f1m = metrics(y_test[:, i], y_pred[:, i])
final_performance.append([
label_columns_idx[i],
round(acc*100, 2),
round(prec*100, 2),
round(rec*100, 2),
round(f1*100, 2),
round(f1m*100, 2)
])
print("-" * 115)
print("")
print("FINAL PERFORMANCE BY APPLIANCE (LABEL):")
df_metrics = pd.DataFrame(
data = final_performance,
columns = ["Appliance", "Accuracy", "Precision", "Recall", "F1-score", "F1-macro"]
)
display(df_metrics)
print("")
print("OVERALL AVERAGE PERFORMANCE:")
final_performance = np.mean(np.array(final_performance)[:, 1:].astype(float), axis = 0)
display(pd.DataFrame(
data = {
"Metric": ["Accuracy", "Precision", "Recall", "F1-score", "F1-macro"],
"Result (%)": [round(p, 2) for p in final_performance]
}
))
# print("-----------------")
# print("Accuracy : {0:.2f}%".format( final_performance[0] ))
# print("Precision : {0:.2f}%".format( final_performance[1] ))
# print("Recall : {0:.2f}%".format( final_performance[2] ))
# print("F1-score : {0:.2f}%".format( final_performance[3] ))
# print("F1-macro : {0:.2f}%".format( final_performance[4] ))
# print("-----------------")
A adoção do gráfico de recorrência para o processo de classificação de série temporal, no contexto de NILM, apresentou desempenho superior ao método GAF (benchmarking 2).
Agora, o próximo passo é estruturar um pipeline único de comparação das 3 abordagens, que inclui desde o pré-processamento até a análise de resultados.
In [ ]: