In [1]:
%load_ext autoreload
In [2]:
%autoreload 2
In [6]:
import os
import numpy as np
from IPython.display import Image
In [7]:
# make sure we're in the root directory
pwd = os.getcwd()
if pwd.endswith('ipynb'):
os.chdir('..')
#print os.getcwd()
In [10]:
Image("data/architecture_art2.png", width=500)
Out[10]:
In [11]:
from ART2 import ART2
# This is my data!
idata = np.array([0.8, 0.6])
nn = ART2(n=len(idata), m=2, rho=0.9, theta=0.1)
nn.start_logging(to_file=False, to_console=True)
nn.learning_trial(idata = idata)
nn.stop_logging()
In [12]:
print nn.Bij.T
print
print nn.Tji
In [13]:
nn.start_logging()
# second pattern
idata = np.array([0.6, 0.8])
nn.learning_trial(idata = idata)
nn.stop_logging()
In [14]:
# data directory
data_dir = 'data'
print os.listdir(data_dir)
# ASCII data file
data_file = 'ASCII_01.txt'
In [16]:
with open(os.path.join(data_dir, data_file), 'r') as f:
raw_data = f.read()
# Get data into a usable form here
data = [d.strip() for d in raw_data.split('\n\n')]
data = [d for d in data if d is not '']
data = [d.replace('\n', '') for d in data]
# print the data
data
Out[16]:
In [26]:
def format_output(raw):
out = "{}\n{}\n{}\n{}\n{}".format(
raw[:5],
raw[5:10],
raw[10:15],
raw[15:20],
raw[20:25],
)
return out
from collections import Counter
import numpy as np
def preprocess_data(data):
"""
Convert to numpy array
Convert to 1s and 0s
"""
# Get useful information from first row
if data[0]:
irow = data[0]
# get size
idat_size = len(irow)
# get unique characters
chars = False
while not chars:
chars = get_unique_chars(irow, reverse=True)
char1, char2 = chars
outdata = []
idat = np.zeros(idat_size, dtype=bool)
#convert to boolean using the chars identified
for irow in data:
assert len(irow) == idat_size, "data row lengths not consistent"
idat = [x==char1 for x in irow]
# note: idat is a list of bools
idat =list(np.array(idat).astype(int))
outdata.append(idat)
outdata = np.array(outdata)
return outdata.astype(int)
def get_unique_chars(irow, reverse=False):
"""
Get unique characters in data
Helper function
----
reverse: bool
Reverses order of the two chars returned
"""
chars = Counter(irow)
if len(chars) > 2:
raise Exception("Data is not binary")
elif len(chars) < 2:
# first row doesn't contain both chars
return False, False
# Reorder here?
if reverse:
char2, char1 = chars.keys()
else:
char1, char2 = chars.keys()
return char1, char2
In [27]:
# Examine one
print format_output(data[0])
In [24]:
from ART2 import ART2
In [28]:
from collections import defaultdict
# create network
input_row_size = 25
max_categories = 10
rho = 0.20
network = ART2(n=input_row_size, m=max_categories, rho=rho)
# preprocess data
data_cleaned = preprocess_data(data)
# shuffle data?
np.random.seed(1221)
np.random.shuffle(data_cleaned)
# learn data array, row by row
for row in data_cleaned:
network.learn(row)
print
print "n rows of data: ", len(data_cleaned)
print "max categories allowed: ", max_categories
print "rho: ", rho
print "n categories used: ", network.n_cats
print
# output results, row by row
output_dict = defaultdict(list)
for row, row_cleaned in zip (data, data_cleaned):
pred = network.predict(row_cleaned)
output_dict[pred].append(row)
for k,v in output_dict.iteritems():
print "category: {}, ({} members)".format(k, len(v))
print '-'*20
for row in v:
print format_output(row)
print
print
# \ print "'{}':{}".format(
# row,
# network.predict(row_cleaned))