In [2]:
%matplotlib inline
import csv
import numpy as np
pcafields = ['PC' + str(x) for x in range(1,15)]
pca = list()
with open('nb/quarterlypca.csv', encoding = 'utf-8') as f:
reader = csv.DictReader(f)
for row in reader:
newpcarow = []
for field in pcafields:
newpcarow.append(float(row[field]))
pca.append(newpcarow)
pca = np.array(pca)
In [3]:
from scipy import spatial
def distance_matrix(pca):
observations, dimensions = pca.shape
distmat = np.zeros((observations, observations))
for i in range(observations):
for j in range(observations):
dist = spatial.distance.cosine(pca[i], pca[j])
distmat[i, j] = dist
return distmat
d = distance_matrix(pca)
In [4]:
import matplotlib.pyplot as plt
plt.matshow(d, origin = 'lower', cmap = plt.cm.YlOrRd)
plt.show()
In [5]:
def make_foote(quart):
tophalf = [-1] * quart + [1] * quart
bottomhalf = [1] * quart + [-1] * quart
foote = list()
for i in range(quart):
foote.append(tophalf)
for i in range(quart):
foote.append(bottomhalf)
foote = np.array(foote)
return foote
foote5 = make_foote(20)
# This gives us a Foote matrix with a five-year half-width.
def foote_novelty(distmat, foote):
axis1, axis2 = distmat.shape
assert axis1 == axis2
distsize = axis1
axis1, axis2 = foote.shape
assert axis1 == axis2
halfwidth = axis1 / 2
novelties = []
for i in range(distsize):
start = i - halfwidth
end = i + halfwidth
if start < 0 or end > (distsize - 1):
novelties.append(0)
else:
novelties.append(np.sum(foote * distmat[start: end, start: end]))
return novelties
novelties = foote_novelty(d, foote5)
plt.plot(novelties)
plt.show()
print(np.max(novelties))
In [6]:
randomized = np.array(pca)
permuted_peaks = []
for i in range(100):
np.random.shuffle(randomized)
randdist = distance_matrix(randomized)
novelties = foote_novelty(randdist, foote5)
permuted_peaks.append(np.max(novelties))
print(permuted_peaks)
In [7]:
print(len(permuted_peaks))
permuted_peaks.sort(reverse = True)
print(permuted_peaks[0:5])
plt.hist(permuted_peaks)
plt.show()
In [8]:
foote4 = make_foote(16)
novelties = foote_novelty(d, foote4)
plt.plot(novelties)
plt.show()
print("Four-year half-width:" + str(np.max(novelties)))
In [9]:
permuted_peaks = []
for i in range(100):
np.random.shuffle(randomized)
randdist = distance_matrix(randomized)
novelties = foote_novelty(randdist, foote4)
permuted_peaks.append(np.max(novelties))
print(len(permuted_peaks))
permuted_peaks.sort(reverse = True)
print(permuted_peaks[0:5])
In [12]:
foote4.shape
Out[12]:
In [13]:
with open('toptenfootes.csv', mode = 'w') as f:
writer = csv.writer(f)
for quarters in range(12, 40):
thisfoote = make_foote(quarters)
these_maxima = []
for i in range(100):
np.random.shuffle(randomized)
rd = distance_matrix(randomized)
novelties = foote_novelty(rd, thisfoote)
these_maxima.append(np.max(novelties))
these_maxima.sort(reverse = True)
outrow = [quarters]
outrow.extend(these_maxima[0:10])
writer.writerow(outrow)
print(outrow)
In [ ]:
with open('realfootes.csv', mode='w', encoding = 'utf-8') as f:
writer = csv.writer(f)
for quarters in range(12, 40):
thisfoote = make_foote(quarters)
d = distance_matrix(pca)
novelties = foote_novelty(d, thisfoote)
outrow = [quarters]
outrow.extend(novelties)
writer.writerow(outrow)
In [15]:
plt.matshow(randdist, origin = 'lower', cmap = plt.cm.YlOrRd)
plt.show()
In [ ]: