In [1]:
# -*- coding: utf-8 -*-
class Dtw(object):
def __init__(self, seq1, seq2,
patterns = [(-1,-1), (-1,0), (0,-1)],
weights = [{(0,0):2}, {(0,0):1}, {(0,0):1}],
band_r=0.005): #EDIT HERE
self._seq1 = seq1
self._seq2 = seq2
self.len_seq1 = len(seq1)
self.len_seq2 = len(seq2)
self.len_pattern = len(patterns)
self.sum_w = [sum(ws.values()) for ws in weights]
self._r = int(len(seq1)*band_r)
assert len(patterns) == len(weights)
self._patterns = patterns
self._weights = weights
def get_distance(self, i1, i2):
return abs(self._seq1[i1] - self._seq2[i2])
def calculate(self):
g = list([float('inf')]*self.len_seq2 for i in range(self.len_seq1))
cost = list([0]*self.len_seq2 for i in range(self.len_seq1))
g[0][0] = 2*self.get_distance(0, 0)
for i in range(self.len_seq1):
for j in range(max(0,i-self._r), min(i+self._r+1, self.len_seq2)):
for pat_i in range(self.len_pattern):
coor = (i+self._patterns[pat_i][0], j+self._patterns[pat_i][1])
if coor[0]<0 or coor[1]<0:
continue
dist = 0
for w_coor_offset, d_w in self._weights[pat_i].items():
w_coor = (i+w_coor_offset[0], j+w_coor_offset[1])
dist += d_w*self.get_distance(w_coor[0], w_coor[1])
this_val = g[coor[0]][coor[1]] + dist
this_cost = cost[coor[0]][coor[1]] + self.sum_w[pat_i]
if this_val < g[i][j]:
g[i][j] = this_val
cost[i][j] = this_cost
return g[self.len_seq1-1][self.len_seq2-1]/cost[self.len_seq1-1][self.len_seq2-1], g, cost
def print_table(self, tb):
print(' '+' '.join(["{:^7d}".format(i) for i in range(self.len_seq2)]))
for i in range(self.len_seq1):
str = "{:^4d}: ".format(i)
for j in range(self.len_seq2):
str += "{:^7.3f} ".format(tb[i][j])
print (str)
def print_g_matrix(self):
_, tb, _ = self.calculate()
self.print_table(tb)
def print_cost_matrix(self):
_, _, tb = self.calculate()
self.print_table(tb)
def get_dtw(self):
ans, _, _ = self.calculate()
return ans
In [2]:
import csv
import random
import math
import operator
import numpy as np
def loadDataset(filename, data=[]):
with open(filename, 'rb') as csvfile:
lines = csv.reader(csvfile,delimiter=' ')
dataset = list(lines)
for x in range(len(dataset)):
dataset[x] = filter(None, dataset[x])
dataset[x] = list(map(float, dataset[x]))
data.append(dataset[x])
def euclideanDistance(instance1, instance2, length):
distance = 0
for x in range(length):
if x == 0:
continue
distance += pow((instance1[x] - instance2[x]), 2)
return math.sqrt(distance)
def getNeighbors(trainingSet, testInstance, k, pattern, weight, r_band=None):
distances = []
length = len(testInstance)
for x in range(len(trainingSet)):
# z-normalization
d = Dtw(testInstance[1:], trainingSet[x][1:], pattern, weight, r_band)
dist = d.get_dtw()
# dist = euclideanDistance(testInstance, trainingSet[x], length)
distances.append((trainingSet[x], dist))
distances.sort(key=operator.itemgetter(1))
# print "dist >>>> ",distances
neighbors = []
for x in range(k):
neighbors.append(distances[x][0])
return neighbors
def getResponse(neighbors):
classVotes = {}
for x in range(len(neighbors)):
response = neighbors[x][0]
if response in classVotes:
classVotes[response] += 1
else:
classVotes[response] = 1
sortedVotes = sorted(classVotes.iteritems(), key=operator.itemgetter(1), reverse=True)
return sortedVotes[0][0]
def getAccuracy(testSet, predictions):
correct = 0
for x in range(len(testSet)):
if testSet[x][0] == predictions[x]:
correct += 1
return (correct/float(len(testSet))) * 100.0
def knn(trainingSet, testSet, k, pattern, weight, r_band=None):
# generate predictions
predictions=[]
for x in range(len(testSet)):
# print ">>",testSet[x]
neighbors = getNeighbors(trainingSet, testSet[x], k, pattern, weight, r_band)
# print "neighbors >>", neighbors
result = getResponse(neighbors)
# print "result >>", result
predictions.append(result)
# print('> predicted=' + repr(result) + ', actual=' + repr(testSet[x][0]))
accuracy = getAccuracy(testSet, predictions)
return accuracy
def prepareData(train_data, test_data):
# prepare data
rawTrainingSet=[]
rawTestSet=[]
testSet=[]
trainingSet=[]
loadDataset(train_data, rawTrainingSet)
loadDataset(test_data, rawTestSet)
for x in rawTrainingSet:
newTS = np.append(x[0], ( np.array(x[1:])-np.mean(x[1:]) )/np.std(x[1:]) )
trainingSet.append(newTS)
for x in rawTestSet:
newTS = np.append(x[0], ( np.array(x[1:])-np.mean(x[1:]) )/np.std(x[1:]) )
testSet.append(newTS)
# print 'Train set: ' + repr(len(trainingSet))
# print trainingSet
# print 'Test set: ' + repr(len(testSet))
# print testSet
return trainingSet, testSet
In [3]:
# EDIT HERE
TRAIN_DATA = 'dataset/Beef_TRAIN'
TEST_DATA = 'dataset/Beef_TEST'
OUTPUT_FILE = 'acc_SHAPE_Beef.csv'
In [4]:
trainingSet, testSet = prepareData(TRAIN_DATA, TEST_DATA)
with open(OUTPUT_FILE, "w") as myfile:
myfile.write("pattern_id,p,r_band_size,accuracy\n")
In [5]:
PATTERNS_1 = [(0,-1), (-1,-1), (-1,0)]
WEIGHTS_SYM_1 = [{(0,0):1}, {(0,0):2}, {(0,0):1}]
In [6]:
acc = knn(trainingSet, testSet, 1, PATTERNS_1, WEIGHTS_SYM_1, 0.005)
print "Pattern#1 R-band=0.01 (1%) acc >",acc
with open(OUTPUT_FILE, "a") as myfile:
myfile.write("1,0,0.01,"+str(acc)+"\n")
In [7]:
acc = knn(trainingSet, testSet, 1, PATTERNS_1, WEIGHTS_SYM_1, 0.015)
print "Pattern#1 R-band=0.03 (3%) acc >",acc
with open(OUTPUT_FILE, "a") as myfile:
myfile.write("1,0,0.03,"+str(acc)+"\n")
In [8]:
acc = knn(trainingSet, testSet, 1, PATTERNS_1, WEIGHTS_SYM_1, 0.025)
print "Pattern#1 R-band=0.05 (5%) acc >",acc
with open(OUTPUT_FILE, "a") as myfile:
myfile.write("1,0,0.05,"+str(acc)+"\n")
In [9]:
PATTERNS_2 = [(-1,-3), (-1,-2), (-1,-1), (-2,-1), (-3,-1)]
WEIGHTS_SYM_2 = [{(0,-2):2, (0,-1):1, (0,0):1}, \
{(0,-1):2, (0,0):1}, \
{(0,0):2}, \
{(-1,0):2, (0,0):1}, \
{(-2,0):2, (-1,0):1, (0,0):1}]
In [10]:
acc = knn(trainingSet, testSet, 1, PATTERNS_2, WEIGHTS_SYM_2, 0.005)
print "Pattern#2 R-band=0.01 (1%) acc >",acc
with open(OUTPUT_FILE, "a") as myfile:
myfile.write("2,1/2,0.01,"+str(acc)+"\n")
In [11]:
acc = knn(trainingSet, testSet, 1, PATTERNS_2, WEIGHTS_SYM_2, 0.015)
print "Pattern#2 R-band=0.03 (3%) acc >",acc
with open(OUTPUT_FILE, "a") as myfile:
myfile.write("2,1/2,0.03,"+str(acc)+"\n")
In [12]:
acc = knn(trainingSet, testSet, 1, PATTERNS_2, WEIGHTS_SYM_2, 0.025)
print "Pattern#2 R-band=0.05 (5%) acc >",acc
with open(OUTPUT_FILE, "a") as myfile:
myfile.write("2,1/2,0.05,"+str(acc)+"\n")
In [13]:
PATTERNS_3 = [(-1,-2), (-1,-1), (-2,-1)]
WEIGHTS_SYM_3 = [{(0,-1):2, (0,0):1}, \
{(0,0):2}, \
{(-1,0):2, (0,0):1}]
In [14]:
acc = knn(trainingSet, testSet, 1, PATTERNS_3, WEIGHTS_SYM_3, 0.005)
print "Pattern#3 R-band=0.01 (1%) acc >",acc
with open(OUTPUT_FILE, "a") as myfile:
myfile.write("3,1,0.01,"+str(acc)+"\n")
In [15]:
acc = knn(trainingSet, testSet, 1, PATTERNS_3, WEIGHTS_SYM_3, 0.015)
print "Pattern#3 R-band=0.03 (3%) acc >",acc
with open(OUTPUT_FILE, "a") as myfile:
myfile.write("3,1,0.03,"+str(acc)+"\n")
In [16]:
acc = knn(trainingSet, testSet, 1, PATTERNS_3, WEIGHTS_SYM_3, 0.025)
print "Pattern#3 R-band=0.05 (5%) acc >",acc
with open(OUTPUT_FILE, "a") as myfile:
myfile.write("3,1,0.05,"+str(acc)+"\n")
In [21]:
PATTERNS_4 = [(-2,-3), (-1,-1), (-3,-2)]
WEIGHTS_SYM_4 = [{(-1,-2):2, (0,-1):2, (0,0):1}, \
{(0,0):2}, \
{(-2,-1):2, (-1,0):2, (0,0):1}]
In [22]:
acc = knn(trainingSet, testSet, 1, PATTERNS_3, WEIGHTS_SYM_3, 0.005)
print "Pattern#4 R-band=0.01 (1%) acc >",acc
with open(OUTPUT_FILE, "a") as myfile:
myfile.write("4,2,0.01,"+str(acc)+"\n")
In [23]:
acc = knn(trainingSet, testSet, 1, PATTERNS_3, WEIGHTS_SYM_3, 0.015)
print "Pattern#4 R-band=0.03 (3%) acc >",acc
with open(OUTPUT_FILE, "a") as myfile:
myfile.write("4,2,0.03,"+str(acc)+"\n")
In [24]:
acc = knn(trainingSet, testSet, 1, PATTERNS_3, WEIGHTS_SYM_3, 0.025)
print "Pattern#4 R-band=0.05 (5%) acc >",acc
with open(OUTPUT_FILE, "a") as myfile:
myfile.write("4,2,0.05,"+str(acc)+"\n")