Professor: André Paim Lemos
Autores: Leonardo Ferreira (leauferreira@gmail.com) e Petrônio Silva (petronio.candido@gmail.com)
In [1]:
import numpy as np
import pandas as pd
import matplotlib as plt
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn.cross_validation import KFold
%pylab inline
O objetivo desse estudo é gerar um modelo de predição de séries temporais para os conjuntos de dados Enrollments e TAIEX, obtido a partir da comparação do desempenho entre algumas das mais conhecidas técnicas existentes na literatura. É desejável, mas não imprescindível, ainda que esse modelo seja analisável com boa interpretabilidade das regras.
Para cada método de FTS implementado será investigado o seu desempenho para: \begin{enumerate} \item Os valores originais da série ($F(T)$) e as variações ($\Delta F(T)$) \item Diferentes partições de $U$ \item Variação nos parâmetros específicos de cada modelo \end{enumerate}
Serão escolhidos os parâmetros para cada modelo com menor erro percentual médio / erro quadrático médio e depois os diversos modelos serão comparados entre si.
In [2]:
# Erro quadrático médio
def rmse(predictions,targets):
return np.sqrt(np.mean((predictions-targets)**2))
# Erro Percentual médio
def mape(predictions,targets):
return np.mean(abs(predictions-targets)/predictions)
In [3]:
def plotComparedSeries(original,fts,title):
fig = plt.figure(figsize=[20,6])
ax = fig.add_subplot(111)
predicted = [fts.predict(xx) for xx in original]
error = rmse(original,predicted)
ax.plot(original,color='b',label="Original")
ax.plot(predicted,color='r',label="Predicted")
handles0, labels0 = ax.get_legend_handles_labels()
ax.legend(handles0,labels0)
ax.set_title(title)
ax.set_ylabel('F(T)')
ax.set_xlabel('T')
ax.set_xlim([0,len(original)])
ax.set_ylim([min(original),max(original)])
In [109]:
def plotCompared(original,predicted,labels,title):
fig = plt.figure(figsize=[13,6])
ax = fig.add_subplot(111)
ax.plot(original,color='k',label="Original")
for c in range(0,len(predicted)):
ax.plot(predicted[c],label=labels[c])
handles0, labels0 = ax.get_legend_handles_labels()
ax.legend(handles0,labels0)
ax.set_title(title)
ax.set_ylabel('F(T)')
ax.set_xlabel('T')
ax.set_xlim([0,len(original)])
ax.set_ylim([min(original),max(original)])
In [5]:
a = np.arange(0,5)
a
Out[5]:
In [101]:
def SelecaoKFold_MenorRMSE(original,parameters,modelo):
nfolds = 5
ret = []
errors = np.array([[0 for k in parameters] for z in np.arange(0,nfolds)])
predicted_best = []
print("Série Original")
fig = plt.figure(figsize=[18,10])
fig.suptitle("Comparação de modelos ")
ax0 = fig.add_axes([0, 0.5, 0.65, 0.45]) #left, bottom, width, height
ax0.set_xlim([0,len(original)])
ax0.set_ylim([min(original),max(original)])
ax0.set_title('Série Temporal')
ax0.set_ylabel('F(T)')
ax0.set_xlabel('T')
ax0.plot(original,label="Original")
min_rmse_fold = 100000.0
best = None
fc = 0 #Fold count
kf = KFold(len(original), n_folds=nfolds)
for train_ix, test_ix in kf:
train = original[train_ix]
test = original[test_ix]
min_rmse = 100000.0
best_fold = None
predicted_best_fold = []
errors_fold = []
pc = 0 #Parameter count
for p in parameters:
sets = GridPartitionerTrimf(train,p)
fts = modelo(str(p)+ " particoes")
fts.learn(train,sets)
predicted = [fts.predict(xx) for xx in test]
error = rmse(np.array(predicted),np.array(test))
errors_fold.append(error)
print(fc, p, error)
errors[fc,pc] = error
if error < min_rmse:
min_rmse = error
best_fold = fts
predicted_best_fold = predicted
pc = pc + 1
predicted_best_fold = [best_fold.predict(xx) for xx in original]
ax0.plot(predicted_best_fold,label=best_fold.name)
if np.mean(errors_fold) < min_rmse_fold:
min_rmse_fold = np.mean(errors)
best = best_fold
predicted_best = predicted_best_fold
fc = fc + 1
handles0, labels0 = ax0.get_legend_handles_labels()
ax0.legend(handles0, labels0)
ax1 = Axes3D(fig, rect=[0.7, 0.5, 0.3, 0.45], elev=30, azim=144)
#ax1 = fig.add_axes([0.6, 0.0, 0.45, 0.45], projection='3d')
ax1.set_title('Comparação dos Erros Quadráticos Médios')
ax1.set_zlabel('RMSE')
ax1.set_xlabel('K-fold')
ax1.set_ylabel('Partições')
X,Y = np.meshgrid(np.arange(0,nfolds),parameters)
surf = ax1.plot_surface(X, Y, errors.T, rstride=1, cstride=1, antialiased=True)
ret.append(best)
ret.append(predicted_best)
# Modelo diferencial
print("\nSérie Diferencial")
errors = np.array([[0 for k in parameters] for z in np.arange(0,nfolds)])
predictedd_best = []
ax2 = fig.add_axes([0, 0, 0.65, 0.45]) #left, bottom, width, height
ax2.set_xlim([0,len(original)])
ax2.set_ylim([min(original),max(original)])
ax2.set_title('Série Temporal')
ax2.set_ylabel('F(T)')
ax2.set_xlabel('T')
ax2.plot(original,label="Original")
min_rmse = 100000.0
min_rmse_fold = 100000.0
bestd = None
fc = 0
diff = diferencas(original)
kf = KFold(len(original), n_folds=nfolds)
for train_ix, test_ix in kf:
train = diff[train_ix]
test = diff[test_ix]
min_rmse = 100000.0
best_fold = None
predicted_best_fold = []
errors_fold = []
pc = 0
for p in parameters:
sets = GridPartitionerTrimf(train,p)
fts = modelo(str(p)+ " particoes")
fts.learn(train,sets)
predicted = [fts.predictDiff(test,xx) for xx in np.arange(len(test))]
error = rmse(np.array(predicted),np.array(test))
print(fc, p,error)
errors[fc,pc] = error
errors_fold.append(error)
if error < min_rmse:
min_rmse = error
best_fold = fts
pc = pc + 1
predicted_best_fold = [best_fold.predictDiff(original, xx) for xx in np.arange(len(original))]
ax2.plot(predicted_best_fold,label=best_fold.name)
if np.mean(errors_fold) < min_rmse_fold:
min_rmse_fold = np.mean(errors)
best = best_fold
predicted_best = predicted_best_fold
fc = fc + 1
handles0, labels0 = ax2.get_legend_handles_labels()
ax2.legend(handles0, labels0)
ax3 = Axes3D(fig, rect=[0.7, 0, 0.3, 0.45], elev=30, azim=144)
#ax1 = fig.add_axes([0.6, 0.0, 0.45, 0.45], projection='3d')
ax3.set_title('Comparação dos Erros Quadráticos Médios')
ax3.set_zlabel('RMSE')
ax3.set_xlabel('K-fold')
ax3.set_ylabel('Partições')
X,Y = np.meshgrid(np.arange(0,nfolds),parameters)
surf = ax3.plot_surface(X, Y, errors.T, rstride=1, cstride=1, antialiased=True)
ret.append(best)
ret.append(predicted_best)
return ret
In [7]:
def SelecaoSimples_MenorRMSE(original,parameters,modelo):
ret = []
errors = []
predicted_best = []
print("Série Original")
fig = plt.figure(figsize=[20,12])
fig.suptitle("Comparação de modelos ")
ax0 = fig.add_axes([0, 0.5, 0.65, 0.45]) #left, bottom, width, height
ax0.set_xlim([0,len(original)])
ax0.set_ylim([min(original),max(original)])
ax0.set_title('Série Temporal')
ax0.set_ylabel('F(T)')
ax0.set_xlabel('T')
ax0.plot(original,label="Original")
min_rmse = 100000.0
best = None
for p in parameters:
sets = GridPartitionerTrimf(original,p)
fts = modelo(str(p)+ " particoes")
fts.learn(original,sets)
predicted = [fts.predict(xx) for xx in original]
ax0.plot(predicted,label=fts.name)
error = rmse(np.array(predicted),np.array(original))
print(p,error)
errors.append(error)
if error < min_rmse:
min_rmse = error
best = fts
predicted_best = predicted
handles0, labels0 = ax0.get_legend_handles_labels()
ax0.legend(handles0, labels0)
ax1 = fig.add_axes([0.7, 0.5, 0.3, 0.45]) #left, bottom, width, height
ax1.set_title('Comparação dos Erros Quadráticos Médios')
ax1.set_ylabel('RMSE')
ax1.set_xlabel('Quantidade de Partições')
ax1.set_xlim([min(parameters),max(parameters)])
ax1.plot(parameters,errors)
ret.append(best)
ret.append(predicted_best)
# Modelo diferencial
print("\nSérie Diferencial")
errors = []
predictedd_best = []
ax2 = fig.add_axes([0, 0, 0.65, 0.45]) #left, bottom, width, height
ax2.set_xlim([0,len(original)])
ax2.set_ylim([min(original),max(original)])
ax2.set_title('Série Temporal')
ax2.set_ylabel('F(T)')
ax2.set_xlabel('T')
ax2.plot(original,label="Original")
min_rmse = 100000.0
bestd = None
for p in parameters:
sets = GridPartitionerTrimf(diferencas(original),p)
fts = modelo(str(p)+ " particoes")
fts.learn(diferencas(original),sets)
predicted = [fts.predictDiff(original, xx) for xx in range(1,len(original))]
predicted.insert(0,original[0])
ax2.plot(predicted,label=fts.name)
error = rmse(np.array(predicted),np.array(original))
print(p,error)
errors.append(error)
if error < min_rmse:
min_rmse = error
bestd = fts
predictedd_best = predicted
handles0, labels0 = ax2.get_legend_handles_labels()
ax2.legend(handles0, labels0)
ax3 = fig.add_axes([0.7, 0, 0.3, 0.45]) #left, bottom, width, height
ax3.set_title('Comparação dos Erros Quadráticos Médios')
ax3.set_ylabel('RMSE')
ax3.set_xlabel('Quantidade de Partições')
ax3.set_xlim([min(parameters),max(parameters)])
ax3.plot(parameters,errors)
ret.append(bestd)
ret.append(predictedd_best)
return ret
In [106]:
def compareModelsPlot(original,models_fo,models_ho):
fig = plt.figure(figsize=[13,6])
fig.suptitle("Comparação de modelos ")
ax0 = fig.add_axes([0, 0, 1, 1]) #left, bottom, width, height
rows = []
for model in models_fo:
fts = model["model"]
ax0.plot(model["predicted"], label=model["name"])
for model in models_ho:
fts = model["model"]
ax0.plot(model["predicted"], label=model["name"])
handles0, labels0 = ax0.get_legend_handles_labels()
ax0.legend(handles0, labels0)
def compareModelsTable(original,models_fo,models_ho):
fig = plt.figure(figsize=[12,4])
fig.suptitle("Comparação de modelos ")
columns = ['Modelo','Ordem','Partições','RMSE','MAPE (%)']
rows = []
for model in models_fo:
fts = model["model"]
error_r = rmse(model["predicted"],original)
error_m = round(mape(model["predicted"],original)*100,2)
rows.append([model["name"],fts.order,len(fts.sets),error_r,error_m])
for model in models_ho:
fts = model["model"]
error_r = rmse(model["predicted"][fts.order:],original[fts.order:])
error_m = round(mape(model["predicted"][fts.order:],original[fts.order:])*100,2)
rows.append([model["name"],fts.order,len(fts.sets),error_r,error_m])
ax1 = fig.add_axes([0, 0, 1, 1]) #left, bottom, width, height
ax1.set_xticks([])
ax1.set_yticks([])
ax1.table(cellText=rows,
colLabels=columns,
cellLoc='center',
bbox=[0,0,1,1])
sup = "\\begin{tabular}{"
header = ""
body = ""
footer = ""
for c in columns:
sup = sup + "|c"
if len(header) > 0:
header = header + " & "
header = header + "\\textbf{" + c + "} "
sup = sup + "|} \\hline\n"
header = header + "\\\\ \\hline \n"
for r in rows:
lin = ""
for c in r:
if len(lin) > 0:
lin = lin + " & "
lin = lin + str(c)
body = body + lin + "\\\\ \\hline \n"
return sup + header + body + "\\end{tabular}"
In [9]:
enrolments_modelbase_fo = []
enrolments_modelbase_ho = []
taiex_modelbase_fo = []
taiex_modelbase_ho = []
A função de diferença serve para gerar um dataset que, ao invés do valor $F(t)$ original da série, terá a diferença entre dois valores consecutivos ou seja $\Delta F(t) = F(t-1) - F(t)$. O primeiro valor da série de diferenças é sempre considerado $0$.
Nesse tipo de série, o universo de discurso é dado por $U = [\min(\Delta F(t)),\max(\Delta F(t))]$
In [10]:
def diferencas(original):
n = len(original)
diff = [ original[t-1]-original[t] for t in np.arange(1,n) ]
diff.insert(0,0)
return np.array(diff)
In [99]:
enrollments = pd.read_csv("DataSets/Enrollments.csv", sep=";")
enrollments = np.array(enrollments["Enrollments"])
fig = plt.figure(figsize=[10,5])
plot(enrollments)
Out[99]:
In [98]:
fig = plt.figure(figsize=[10,5])
plot(diferencas(enrollments))
Out[98]:
In [97]:
taiex = pd.read_csv("DataSets/TAIEX.csv", sep=",")
taiexsample = np.array(taiex["avg"][1:2000])
fig = plt.figure(figsize=[10,5])
plot(taiexsample)
Out[97]:
In [96]:
fig = plt.figure(figsize=[10,5])
plot(diferencas(taiexsample))
#np.array(taiexsample)
Out[96]:
In [15]:
def trimf(x,parameters):
if(x < parameters[0]):
return 0
elif(x >= parameters[0] and x < parameters[1]):
return (x-parameters[0])/(parameters[1]-parameters[0])
elif(x >= parameters[1] and x <= parameters[2]):
return (parameters[2]-x)/(parameters[2]-parameters[1])
else:
return 0
def trapmf(x, parameters):
if(x < parameters[0]):
return 0
elif(x >= parameters[0] and x < parameters[1]):
return (x-parameters[0])/(parameters[1]-parameters[0])
elif(x >= parameters[1] and x <= parameters[2]):
return 1
elif(x >= parameters[2] and x <= parameters[3]):
return (parameters[3]-x)/(parameters[3]-parameters[2])
else:
return 0
def gaussmf(x,parameters):
return math.exp(-0.5*((x-parameters[0]) / parameters[1] )**2)
def bellmf(x,parameters):
return 1 / (1 + abs((xx - parameters[2])/parameters[0])**(2*parameters[1]))
def sigmf(x,parameters):
return 1 / (1 + math.exp(-parameters[0] * (x - parameters[1])))
In [16]:
class FuzzySet:
def __init__(self,name,mf,parameters,centroid):
self.name = name
self.mf = mf
self.parameters = parameters
self.centroid = centroid
def membership(self,x):
return self.mf(x,self.parameters)
def __str__(self):
return self.name + ": " + str(self.mf) + "(" + str(self.parameters) + ")"
In [17]:
def GridPartitionerTrimf(data,npart,names = None,prefix = "A"):
sets = []
dmax = max(data)
dmin = min(data)
dlen = dmax - dmin
partlen = dlen / npart
partition = dmin
for c in range(npart):
sets.append( FuzzySet(prefix+str(c),trimf,[partition-partlen, partition, partition+partlen], partition ) )
partition = partition + partlen
return sets
In [18]:
sts = GridPartitionerTrimf(enrollments,10)
for s in sts:
print(s)
In [19]:
class FTS:
def __init__(self,order,name):
self.sets = {}
self.flrgs = {}
self.order = order
self.name = name
def fuzzy(self,data):
best = {"fuzzyset":"", "membership":0.0}
for f in self.sets:
fset = self.sets[f]
if best["membership"] <= fset.membership(data):
best["fuzzyset"] = fset.name
best["membership"] = fset.membership(data)
return best
def defuzzy(self,data):
pass
def learn(self, data, sets):
pass
def predict(self,data):
return self.defuzzy(data)
def predictDiff(self,data,t):
return data[t] + self.defuzzy(data[t-1]-data[t])
def __str__(self):
tmp = self.name + ":\n"
for r in self.flrgs.keys():
tmp = tmp + str(self.flrgs[r]) + "\n"
return tmp
In [ ]:
Os trabalhos pioneiros em séries temporais nebulosas são de \cite{song1993fuzzy} e a evolução apresentada por \cite{chen1996forecasting}. Nesses trabalhos define-se que o modelo de primeira ordem (\textit{First Order Fuzzy Time Series}) caracteriza-se por presumir que $F(t)$ é determinado unicamente por $F(t-1)$ ( ou $F(t-2)$ ou ... ou $F(t-m)$ ).
In [20]:
class FirstOrderFLRG:
def __init__(self,premiss):
self.premiss = premiss
self.consequent = set()
def append(self,c):
self.consequent.add(c)
def __str__(self):
tmp = self.premiss + " -> "
tmp2 = ""
for c in self.consequent:
if len(tmp2) > 0:
tmp2 = tmp2 + ","
tmp2 = tmp2 + c
return tmp + tmp2
In [21]:
class FirstOrderFTS(FTS):
def __init__(self,name):
super(FirstOrderFTS, self).__init__(1,name)
def defuzzy(self,data):
actual = self.fuzzy(data)
if actual["fuzzyset"] not in self.flrgs:
return self.sets[actual["fuzzyset"]].centroid
flrg = self.flrgs[actual["fuzzyset"]]
count = 0.0
denom = 0.0
for s in flrg.consequent:
denom = denom + self.sets[s].centroid
count = count + 1.0
return denom/count
def learn(self, data, sets):
last = {"fuzzyset":"", "membership":0.0}
actual = {"fuzzyset":"", "membership":0.0}
for s in sets:
self.sets[s.name] = s
self.flrgs = {}
count = 1
for inst in data:
actual = self.fuzzy(inst)
if count > self.order:
if last["fuzzyset"] not in self.flrgs:
self.flrgs[last["fuzzyset"]] = FirstOrderFLRG(last["fuzzyset"])
self.flrgs[last["fuzzyset"]].append(actual["fuzzyset"])
count = count + 1
last = actual
In [22]:
fts1,fts1p,ftsd1,ftsd1p = SelecaoSimples_MenorRMSE(enrollments,[4,8, 10,20, 30],FirstOrderFTS)
In [23]:
print(fts1)
In [24]:
plotCompared(enrollments,[fts1p,ftsd1p],["Normal","Diferenças"],'')
enrolments_modelbase_fo.append({"name":"FTS","model":fts1,"predicted":fts1p})
enrolments_modelbase_fo.append({"name":"FTS Dif.","model":ftsd1,"predicted":ftsd1p})
In [102]:
fts2,fts2p,ftsd2,ftsd2p= SelecaoKFold_MenorRMSE(taiexsample,[10,15,20,25,30,35],FirstOrderFTS)
In [26]:
print(ftsd2)
In [27]:
plotCompared(taiexsample,[fts2p,ftsd2p],["Normal","Diferenças"],'')
taiex_modelbase_fo.append({"name":"FTS","model":fts2,"predicted":fts2p})
taiex_modelbase_fo.append({"name":"FTS Dif.","model":ftsd2,"predicted":ftsd2p})
O trabalho de \cite{yu2005weighted} propõe um modelo - as \textit{Weighted Fuzzy Time Series} - em que os grupos de regras permitem repetições no consequente e tenham ponderação monotonicamente crescente, baseado na ordem cronológica dos termos no consequente. Nesse modelo as FLRG's permitem a repetição de conjuntos no consequente das regras, e os conjuntos devem ser apresentados em ordem cronológica.
In [28]:
class WeightedFLRG(FTS):
def __init__(self,premiss):
self.premiss = premiss
self.consequent = []
self.count = 1.0
def append(self,c):
self.consequent.append(c)
self.count = self.count + 1.0
def weights(self):
tot = sum( np.arange(1.0,self.count,1.0) )
return np.array([ k/tot for k in np.arange(1.0,self.count,1.0) ])
def __str__(self):
tmp = self.premiss + " -> "
tmp2 = ""
cc = 1.0
tot = sum( np.arange(1.0,self.count,1.0) )
for c in self.consequent:
if len(tmp2) > 0:
tmp2 = tmp2 + ","
tmp2 = tmp2 + c + "(" + str(cc/tot) + ")"
cc = cc + 1.0
return tmp + tmp2
In [29]:
class WeightedFTS(FTS):
def __init__(self,name):
super(WeightedFTS, self).__init__(1,name)
def defuzzy(self,data):
actual = self.fuzzy(data)
if actual["fuzzyset"] not in self.flrgs:
return self.sets[actual["fuzzyset"]].centroid
flrg = self.flrgs[actual["fuzzyset"]]
mi = np.array([self.sets[s].centroid for s in flrg.consequent])
return mi.dot( flrg.weights() )
def learn(self, data, sets):
last = {"fuzzyset":"", "membership":0.0}
actual = {"fuzzyset":"", "membership":0.0}
for s in sets:
self.sets[s.name] = s
self.flrgs = {}
count = 1
for inst in data:
actual = self.fuzzy(inst)
if count > self.order:
if last["fuzzyset"] not in self.flrgs:
self.flrgs[last["fuzzyset"]] = WeightedFLRG(last["fuzzyset"])
self.flrgs[last["fuzzyset"]].append(actual["fuzzyset"])
count = count + 1
last = actual
In [30]:
wfts1,wfts1p,wftsd1,wftsd1p = SelecaoSimples_MenorRMSE(enrollments,[6,8,10,12,14], WeightedFTS)
In [31]:
print(wfts1)
In [32]:
plotCompared(enrollments,[wfts1p,wftsd1p],["Normal","Diferenças"],'')
enrolments_modelbase_fo.append({"name":"WFTS","model":wfts1,"predicted":wfts1p})
enrolments_modelbase_fo.append({"name":"WFTS Dif.","model":wftsd1,"predicted":wftsd1p})
In [33]:
wfts2,wfts2p,wftsd2,wftsd2p = SelecaoKFold_MenorRMSE(taiexsample,[2,5,10,15,20,25,30,35], WeightedFTS)
In [34]:
print(wfts2)
In [35]:
plotCompared(taiexsample,[wfts2p,wftsd2p],["Normal","Diferenças"],'')
taiex_modelbase_fo.append({"name":"WFTS","model":wfts2,"predicted":wfts2p})
taiex_modelbase_fo.append({"name":"WFTS Dif.","model":wftsd2,"predicted":wftsd2p})
Os trabalhos de \cite{ismail2011enrollment} e \cite{efendi2013improved} modificam a forma como os pesos são assinalados às regras no modelo de \cite{yu2005weighted}. A diferença mais importante é que a quantidade de recorrências de cada regra é que vai determinar o seu peso, para uma regra $A_i \rightarrow A_j,A_k$, tendo $A_i \rightarrow A_j$ $n_1$ recorrências e $A_i \rightarrow A_k$ $n_2$ recorrências, o valor de cada peso será $w_k = n_k / \sum_{i=1..n} n_i$, sendo $n$ o número de regras.
In [36]:
class ImprovedWeightedFLRG:
def __init__(self,premiss):
self.premiss = premiss
self.consequent = {}
self.count = 0.0
def append(self,c):
if c not in self.consequent:
self.consequent[c] = 1.0
else:
self.consequent[c] = self.consequent[c] + 1.0
self.count = self.count + 1.0
def weights(self):
return np.array([ self.consequent[c]/self.count for c in self.consequent.keys() ])
def __str__(self):
tmp = self.premiss + " -> "
tmp2 = ""
for c in self.consequent.keys():
if len(tmp2) > 0:
tmp2 = tmp2 + ","
tmp2 = tmp2 + c + "(" + str(self.consequent[c]/self.count) + ")"
return tmp + tmp2
In [37]:
class ImprovedWeightedFTS(FTS):
def __init__(self,name):
super(ImprovedWeightedFTS, self).__init__(1,name)
def defuzzy(self,data):
actual = self.fuzzy(data)
if actual["fuzzyset"] not in self.flrgs:
return self.sets[actual["fuzzyset"]].centroid
flrg = self.flrgs[actual["fuzzyset"]]
mi = np.array([self.sets[s].centroid for s in flrg.consequent.keys()])
return mi.dot( flrg.weights() )
def learn(self, data, sets):
last = {"fuzzyset":"", "membership":0.0}
actual = {"fuzzyset":"", "membership":0.0}
for s in sets:
self.sets[s.name] = s
self.flrgs = {}
count = 1
for inst in data:
actual = self.fuzzy(inst)
if count > self.order:
if last["fuzzyset"] not in self.flrgs:
self.flrgs[last["fuzzyset"]] = ImprovedWeightedFLRG(last["fuzzyset"])
self.flrgs[last["fuzzyset"]].append(actual["fuzzyset"])
count = count + 1
last = actual
In [38]:
#fts = ImprovedWeightedFTS()
#sets = GridPartitionerTrimf(enrollments["Enrollments"],8)
#fts.learn(enrollments["Enrollments"],sets)
iwfts1,iwfts1p,iwftsd1,iwftsd1p = SelecaoSimples_MenorRMSE(enrollments,[6,8,10,12,14], ImprovedWeightedFTS)
In [39]:
print(iwfts1)
In [40]:
plotCompared(enrollments,[iwfts1p,iwftsd1p],["Normal","Diferenças"],'')
enrolments_modelbase_fo.append({"name":"IWFTS","model":iwfts1,"predicted":iwfts1p})
enrolments_modelbase_fo.append({"name":"IWFTS Dif.","model":iwftsd1,"predicted":iwftsd1p})
In [41]:
iwfts2,iwfts2p,iwftsd2,iwftsd2p = SelecaoKFold_MenorRMSE(taiexsample,[2,5,10,15,20,25,30,35], ImprovedWeightedFTS)
In [42]:
print(iwfts2)
In [43]:
plotCompared(taiexsample,[iwfts2p,iwftsd2p],["Normal","Diferenças"],'')
taiex_modelbase_fo.append({"name":"IWFTS","model":iwfts2,"predicted":iwfts2p})
taiex_modelbase_fo.append({"name":"IWFTS Dif.","model":iwftsd2,"predicted":iwftsd2p})
O método EWFTS - \textit{Exponentialy Weighted Fuzzy Time Series}, é utilizado em \cite{sadaei2014short}, e contrasta com o crescimento linear dos pesos proposto por \cite{yu2005weighted} modificando-os por um crescimento exponencial. Dada a FLRG $A_i \rightarrow A_1,A_3,...,A_k$, a matriz $M(t) = [m_1, m_2, ...,m_k]$ dos centros de $A_1,A_3,...,A_k$ a matriz de pesos $w(t)$ será definida por um parâmetro $c$, que formará a série exponencial $c^0, c^1, ..., c^{k-1}$ normalizada. O parâmetro $c$ deve ser maior do que zero.
$$ w(t) = \left[ \frac{1}{\sum_{h=1}^k c^{h-1}},\frac{c^1}{\sum_{h=1}^k c^{h-1}},...,\frac{c^{k-1}}{\sum_{h=1}^k c^{h-1}} \right] $$
In [44]:
class ExponentialyWeightedFLRG:
def __init__(self,premiss,c):
self.premiss = premiss
self.consequent = []
self.count = 0.0
self.c = c
def append(self,c):
self.consequent.append(c)
self.count = self.count + 1.0
def weights(self):
wei = [ self.c**k for k in np.arange(0.0,self.count,1.0)]
tot = sum( wei )
return np.array([ k/tot for k in wei ])
def __str__(self):
tmp = self.premiss + " -> "
tmp2 = ""
cc = 0
wei = [ self.c**k for k in np.arange(0.0,self.count,1.0)]
tot = sum( wei )
for c in self.consequent:
if len(tmp2) > 0:
tmp2 = tmp2 + ","
tmp2 = tmp2 + c + "(" + str(wei[cc]/tot) + ")"
cc = cc + 1
return tmp + tmp2
In [45]:
class ExponentialyWeightedFTS(FTS):
def __init__(self,name):
super(ExponentialyWeightedFTS, self).__init__(1,name)
def defuzzy(self,data):
actual = self.fuzzy(data)
if actual["fuzzyset"] not in self.flrgs:
return self.sets[actual["fuzzyset"]].centroid
flrg = self.flrgs[actual["fuzzyset"]]
mi = np.array([self.sets[s].centroid for s in flrg.consequent])
return mi.dot( flrg.weights() )
def learn(self, data, sets):
last = {"fuzzyset":"", "membership":0.0}
actual = {"fuzzyset":"", "membership":0.0}
for s in sets:
self.sets[s.name] = s
self.flrgs = {}
count = 1
for inst in data:
actual = self.fuzzy(inst)
if count > self.order:
if last["fuzzyset"] not in self.flrgs:
self.flrgs[last["fuzzyset"]] = ExponentialyWeightedFLRG(last["fuzzyset"],2)
self.flrgs[last["fuzzyset"]].append(actual["fuzzyset"])
count = count + 1
last = actual
In [46]:
ewfts1,ewfts1p,ewftsd1,ewftsd1p = SelecaoSimples_MenorRMSE(enrollments,[6,8,10,12,14], ExponentialyWeightedFTS)
In [47]:
print(ewfts1)
In [48]:
plotCompared(enrollments,[ewfts1p,ewftsd1p],["Normal","Diferenças"],'')
enrolments_modelbase_fo.append({"name":"EWFTS","model":ewfts1,"predicted":ewfts1p})
enrolments_modelbase_fo.append({"name":"EWFTS Dif.","model":ewftsd1,"predicted":ewftsd1p})
In [129]:
ewfts2,ewfts2p,ewftsd2,ewftsd2p = SelecaoKFold_MenorRMSE(taiexsample,[2,5,10,15,20,25,30,35], ImprovedWeightedFTS)
In [50]:
print(ewfts2)
In [51]:
plotCompared(taiexsample,[ewfts2p,ewftsd2p],["Normal","Diferenças"],'')
taiex_modelbase_fo.append({"name":"EWFTS","model":ewfts2,"predicted":ewfts2p})
taiex_modelbase_fo.append({"name":"EWFTS Dif.","model":ewftsd2,"predicted":ewftsd2p})
Essa implementação segue os trabalho de \cite{hwang1998handling} e \cite{chen2014high}. Esses modelos se diferenciam dos modelos de primeira ordem pelo uso de $m$ preditores em conjunto, $F(t-1),...,F(t-m)$ tal que $F(t)$ é determinado por $F(t-1)$ e $F(t-2)$ e ... e $F(t-m)$. Regras nesse estilo devem ser interpretadas como SE $F(t-1)$ E $F(t-2)$ E ... E $F(t-m)$ ENTÃO $F(t)$.
O ponto focal desse modelo é a escolha da quantidade $w$ de defasagens de tempo que serão utilizadas na predição, a chamada janela base. Com $w$ serão desenvolvidas o vetor de critérios $C(t)$ e a matriz de operação $O^w(t)$ a partir das quais se obterá a matriz das relações $R(t)$. O vetor $C(t)$ contém as pertinências
\begin{equation} \begin{split} C(t) = \left[ \begin{array}{cccc} C_1 & C_2 & ... & C_k \end{array} \right] \\ C_i = f_{ai}( F(t-1) ) \quad 1 \leq i \leq k \end{split} \end{equation}\begin{equation} \begin{split} O^w(t) = \left[ \begin{array}{cccc} O_{11} & O_{12} & ... & O_{1k} \\ ... \\ O_{w1} & O_{w2} & ... &O_{wk} \end{array} \right] \\ O_{ji} = f_{ai}( F(t-j) ) \quad 1 \leq i \leq k \quad 1 \leq j \leq w \end{split} \end{equation}\begin{equation} \begin{split} R(t) = O^w(t) \times C(t) \\ R_{ji} = O_{ij} \times C_j \quad 1 \leq i \leq k \quad 1 \leq j \leq w \\ R(t) = \left[ \begin{array}{cccc} R_{11} & R_{12} & ... & R_{1k} \\ ... \\ R_{w1} & R_{w2} & ... & R_{wk} \end{array} \right] \end{split} \end{equation}\begin{equation} F(t) = \left[ \begin{array}{cccc} \max(R_{11} & R_{21} & ... & R_{w1}) \\ ... \\ \max(R_{1k} & R_{2k} & ... & R_{wk}) \end{array} \right] \end{equation}As pertinências de $F(t)$ para os $k$ conjuntos nebulosos na fórmula. Para deffuzificar o valor a partir de dos conjuntos, utiliza-se a mesma metodologia dos modelos de primeira ordem.
In [52]:
class HighOrderFTS(FTS):
def __init__(self,order,name):
super(HighOrderFTS, self).__init__(order,name)
def defuzzy(self,data,t):
cn = np.array([0.0 for k in range(len(self.sets))])
ow = np.array([[0.0 for k in range(len(self.sets))] for z in range(self.order-1)])
rn = np.array([[0.0 for k in range(len(self.sets))] for z in range(self.order-1)])
ft = np.array([0.0 for k in range(len(self.sets))])
for s in range(len(self.sets)):
cn[s] = self.sets[s].membership(data[t])
for w in range(self.order-1):
ow[w,s] = self.sets[s].membership(data[t-w])
rn[w,s] = ow[w,s] * cn[s]
ft[s] = max(ft[s],rn[w,s])
mft = max(ft)
out = 0.0
count = 0.0
for s in range(len(self.sets)):
if ft[s] == mft:
out = out + self.sets[s].centroid
count = count + 1.0
return out / count
def learn(self, data, sets):
self.sets = sets
def predict(self,data,t):
return self.defuzzy(data,t)
def predictDiff(self,data,t):
return data[t] + self.defuzzy(diferencas(data),t)
In [53]:
def HOSelecaoSimples_MenorRMSE(original,parameters,orders):
ret = []
errors = np.array([[0 for k in range(len(parameters))] for kk in range(len(orders))])
predicted_best = []
print("Série Original")
fig = plt.figure(figsize=[20,12])
fig.suptitle("Comparação de modelos ")
ax0 = fig.add_axes([0, 0.5, 0.6, 0.45]) #left, bottom, width, height
ax0.set_xlim([0,len(original)])
ax0.set_ylim([min(original),max(original)])
ax0.set_title('Série Temporal')
ax0.set_ylabel('F(T)')
ax0.set_xlabel('T')
ax0.plot(original,label="Original")
min_rmse = 100000.0
best = None
pc = 0
for p in parameters:
oc = 0
for o in orders:
sets = GridPartitionerTrimf(original,p)
fts = HighOrderFTS(o,"k = " + str(p)+ " w = " + str(o))
fts.learn(original,sets)
predicted = [fts.predict(original, xx) for xx in range(o,len(original))]
error = rmse(np.array(predicted),np.array(original[o:]))
for kk in range(o):
predicted.insert(0,None)
ax0.plot(predicted,label=fts.name)
print(o,p,error)
errors[oc,pc] = error
if error < min_rmse:
min_rmse = error
best = fts
predicted_best = predicted
oc = oc + 1
pc = pc + 1
handles0, labels0 = ax0.get_legend_handles_labels()
ax0.legend(handles0, labels0)
ax1 = Axes3D(fig, rect=[0.6, 0.5, 0.45, 0.45], elev=30, azim=144)
#ax1 = fig.add_axes([0.6, 0.5, 0.45, 0.45], projection='3d')
ax1.set_title('Comparação dos Erros Quadráticos Médios por tamanho da janela')
ax1.set_ylabel('RMSE')
ax1.set_xlabel('Quantidade de Partições')
ax1.set_zlabel('W')
X,Y = np.meshgrid(parameters,orders)
surf = ax1.plot_surface(X, Y, errors, rstride=1, cstride=1, antialiased=True)
ret.append(best)
ret.append(predicted_best)
# Modelo diferencial
print("\nSérie Diferencial")
errors = np.array([[0 for k in range(len(parameters))] for kk in range(len(orders))])
predictedd_best = []
ax2 = fig.add_axes([0, 0, 0.6, 0.45]) #left, bottom, width, height
ax2.set_xlim([0,len(original)])
ax2.set_ylim([min(original),max(original)])
ax2.set_title('Série Temporal')
ax2.set_ylabel('F(T)')
ax2.set_xlabel('T')
ax2.plot(original,label="Original")
min_rmse = 100000.0
bestd = None
pc = 0
for p in parameters:
oc = 0
for o in orders:
sets = GridPartitionerTrimf(diferencas(original),p)
fts = HighOrderFTS(o,"k = " + str(p)+ " w = " + str(o))
fts.learn(original,sets)
predicted = [fts.predictDiff(original, xx) for xx in range(o,len(original))]
error = rmse(np.array(predicted),np.array(original[o:]))
for kk in range(o):
predicted.insert(0,None)
ax2.plot(predicted,label=fts.name)
print(o,p,error)
errors[oc,pc] = error
if error < min_rmse:
min_rmse = error
bestd = fts
predictedd_best = predicted
oc = oc + 1
pc = pc + 1
handles0, labels0 = ax2.get_legend_handles_labels()
ax2.legend(handles0, labels0)
ax3 = Axes3D(fig, rect=[0.6, 0.0, 0.45, 0.45], elev=30, azim=144)
#ax3 = fig.add_axes([0.6, 0.0, 0.45, 0.45], projection='3d')
ax3.set_title('Comparação dos Erros Quadráticos Médios')
ax3.set_ylabel('RMSE')
ax3.set_xlabel('Quantidade de Partições')
ax3.set_zlabel('W')
X,Y = np.meshgrid(parameters,orders)
surf = ax3.plot_surface(X, Y, errors, rstride=1, cstride=1, antialiased=True)
ret.append(bestd)
ret.append(predictedd_best)
return ret
In [126]:
hofts1,hofts1p,hoftsd1,hoftsd1p = HOSelecaoSimples_MenorRMSE(enrollments,[4,8,10,12],[2,3,4,5,6])
In [127]:
plotCompared(enrollments,[hofts1p,hoftsd1p],["Normal","Diferenças"],'')
enrolments_modelbase_ho.append({"name":"HOFTS","model":hofts1,"predicted":hofts1p})
enrolments_modelbase_ho.append({"name":"HOFTS Dif.","model":hoftsd1,"predicted":hoftsd1p})
In [56]:
hofts2,hofts2p,hoftsd2,hoftsd2p = HOSelecaoSimples_MenorRMSE(taiexsample,[5,10,15,20,25],[2,3,4,5,6])
In [57]:
plotCompared(taiexsample,[hofts2p,hoftsd2p],["Normal","Diferenças"],'')
taiex_modelbase_ho.append({"name":"HOFTS","model":hofts2,"predicted":hofts2p})
taiex_modelbase_ho.append({"name":"HOFTS Dif.","model":hoftsd2,"predicted":hoftsd2p})
In [108]:
compareModelsPlot(enrollments,enrolments_modelbase_fo,enrolments_modelbase_ho)
In [86]:
tab = compareModelsTable(enrollments,enrolments_modelbase_fo,enrolments_modelbase_ho)
In [113]:
plotCompared(enrollments,[hofts1p],['HOFTS'],'')
In [115]:
print(ftsd1)
In [128]:
for s in hofts1.sets:
print(s)
In [107]:
#taiex_modelbase_fo
compareModelsPlot(taiexsample,taiex_modelbase_fo,taiex_modelbase_ho)
In [89]:
tab = compareModelsTable(taiexsample,taiex_modelbase_fo,taiex_modelbase_ho)
In [121]:
plotCompared(taiexsample,[wftsd2p],['WFTS Dif'],'')
In [135]:
#for s in wfts2.sets: print(wfts2.sets[s])
print(iwfts2)
(Song and Chissom, 1993) Song Qiang and Chissom Brad S, ``Fuzzy time series and its models'', Fuzzy sets and systems, vol. 54, number 3, pp. 269--277, 1993.
(Chen, 1996) Chen Shyi-Ming, ``Forecasting enrollments based on fuzzy time series'', Fuzzy sets and systems, vol. 81, number 3, pp. 311--319, 1996.
(Ismail and Efendi, 2011) Ismail Zuhaimy and Efendi R, ``Enrollment forecasting based on modified weight fuzzy time series'', Journal of Artificial Intelligence, vol. 4, number 1, pp. 110--118, 2011.
(Efendi, Ismail et al., 2013) Efendi Riswan, Ismail Zuhaimy and Deris Mustafa Mat, ``IMPROVED WEIGHT FUZZY TIME SERIES AS USED IN THE EXCHANGE RATES FORECASTING OF US DOLLAR TO RINGGIT MALAYSIA'', International Journal of Computational Intelligence and Applications, vol. 12, number 01, pp. 1350005, 2013.
(Yu, 2005) Yu Hui-Kuang, ``Weighted fuzzy time series models for TAIEX forecasting'', Physica A: Statistical Mechanics and its Applications, vol. 349, number 3, pp. 609--624, 2005.
(Sadaei, Enayatifar et al., 2014) Sadaei Hossein Javedani, Enayatifar Rasul, Abdullah Abdul Hanan et al., ``Short-term load forecasting using a hybrid model with a refined exponentially weighted fuzzy time series and an improved harmony search'', International Journal of Electrical Power \& Energy Systems, vol. 62, number , pp. 118--129, 2014.
(Hwang, Chen et al., 1998) Hwang Jeng-Ren, Chen Shyi-Ming and Lee Chia-Hoang, ``Handling forecasting problems using fuzzy time series'', Fuzzy sets and systems, vol. 100, number 1, pp. 217--228, 1998.
(Chen, 2014) Chen Mu-Yen, ``A high-order fuzzy time series forecasting model for internet stock trading'', Future Generation Computer Systems, vol. 37, number , pp. 461--467, 2014.
In [ ]: