In [ ]:
import os
import numpy as np
from PIL import Image
In [ ]:
# make sure we're in the root directory
pwd = os.getcwd()
if pwd.endswith('ipynb'):
os.chdir('..')
#print os.getcwd()
In [ ]:
# data directory
data_dir = 'data/lettericons'
data_list = []
for f in os.listdir(data_dir):
data_list.append(os.path.join(data_dir, f))
# Just print first five images
data_list[:5]
In [ ]:
from utils import preprocess_data
In [ ]:
raw_data = []
for ii in data_list:
im = Image.open(ii)
idat = np.array(im) > 100
idat = idat.flatten()
raw_data.append(idat)
np.random.seed(111)
np.random.shuffle(raw_data)
data = preprocess_data(raw_data)
In [ ]:
data
In [ ]:
# Examine one
im = Image.open(data_list[0])
im
In [ ]:
data.shape
In [ ]:
%matplotlib inline
from matplotlib.pyplot import imshow
import matplotlib.pyplot as plt
In [ ]:
from utils import display_single_png, display_all_png
In [ ]:
display_single_png(idat)
plt.show()
In [ ]:
display_all_png(data)
plt.show()
In [ ]:
from ART1 import ART1
In [ ]:
from collections import defaultdict
# create networkreload
input_row_size = 100
max_categories = 8
rho = 0.4
network = ART1(n=input_row_size, m=max_categories, rho=rho)
# preprocess data
data_cleaned = preprocess_data(data)
# shuffle data?
np.random.seed(155)
np.random.shuffle(data_cleaned)
# multiple epochs?
network.compute(data_cleaned)
# # learn data array, row by row
# for row in data_cleaned:
# network.learn(row)
print
print "n rows of data: ", len(data_cleaned)
print "max categories allowed: ", max_categories
print "rho: ", rho
#print "n categories used: ", network.n_cats
print
In [ ]:
network.Y
In [ ]:
# print learned clusters
for idx, cluster in enumerate(network.Bij.T):
print "Cluster Unit #{}".format(idx)
display_single_output(cluster)
In [ ]:
# Cluster_index
clust_idx = 2
print "Target: ", clust_idx
idata = network.Bij.T[clust_idx]
idata = idata.astype(bool).astype(int)
display_single_output(idata)
# Prediction
pred = network.predict(idata)
print "prediction (cluster index): ", pred
In [ ]:
# output results, row by row
output_dict = defaultdict(list)
for row, row_cleaned in zip (data, data_cleaned):
pred = network.predict(row_cleaned)
output_dict[pred].append(row)
for k,v in output_dict.iteritems():
print "Cluster #{} ({} members)".format(k, len(v))
print '-'*20
for row in v:
display_single_output(row)
# \ print "'{}':{}".format(
# row,
# network.predict(row_cleaned))
In [ ]:
# of tests
ntests = 10
# number of bits in the pattern to modify
nchanges = 30
for test in range(ntests):
#cluster_index
clust_idx = np.random.randint(network.output_size)
print "Target: ", clust_idx
idata = network.Bij.T[clust_idx]
idata = idata.astype(bool).astype(int)
#modify data
for ii in range(nchanges):
rand_element = np.random.randint(idata.shape[0])
# flip this bit
if idata[rand_element] == 0:
idata[rand_element] = 1
else:
idata[rand_element] = 0
# randomize this bit
idata[rand_element] = np.random.randint(1)
display_single_output(idata)
# prediction
pred = network.predict(idata)
print "prediction (cluster index): ", pred
display_single_output(network.Bij.T[pred])
print "-" * 20
plt.show()
In [ ]:
# print training data
display_output(data)
plt.show()
In [ ]: