1D Mirror Detection Learner

Goal: correctly identify whether an abstract mirror object is within the vision range


In [1]:
import tensorflow as tf
import numpy

Define constants


In [2]:
image_width = 3
image_height = 3
vision_range = image_width * image_height
category_min = 8
category_max = 9
category_count = category_max - category_min + 1
label_type_count = 2  # mirror or not

segment_count = {
    "training": 8000,
    "test": 1300,
    "validation": 2000
    }
sample_count = {
    "training": segment_count['training'] * vision_range,
    "test": segment_count['test'] * vision_range,
    "validation": segment_count['validation'] * vision_range
    }
learning_rate = 0.5

Auxiliary functions

Show result


In [3]:
def show_result(x):
    """Print the result"""
    print("{:.0f}%".format(x * 100))

Automaticaly generate data sets


In [4]:
def mirror_label(isTrue):
    if isTrue:
        return [1, 0]
    else:
        return [0, 1]
    
def random_mirror(vision_range, category_count):
    """Return a n-array with either a randomly placed mirror
        or no mirror at all.
    
    Args:
        vision_range (int): how many objects to be seen at a time
        category_count (int): total number of different objects
            including the mirror type
    Return:
        2-tuple:
             [0]: an array of size same as the "vision_range".
             [1]: 1 one-hot 2-array [1, 0] (has mirror) or [0, 1] (no mirror).
    """

    p1 = numpy.random.randint(1, high=category_count, size=vision_range-1)
    p2 = numpy.random.randint(0, category_count, 1)
    data = numpy.random.permutation(numpy.concatenate((p1, p2)))
    return (data, mirror_label(0 in data))

In [5]:
def mirror_data(n, vision_range, category_count):
    """Return a n x size matrix
    Args:
        n (int): number of data points
        vision_range (int): number of objects to be seen at a time
        category_count (int): total number of different object categories
            including the mirror
    Returns:
        A dictionary:
            data: (n, v)-sized 2D numpy array where v is the vision_range
            labels: (n,)-sized 1D numpy array (each element is either 1 or 0)
    """
    raw = [random_mirror(vision_range, category_count) for i in range(n)]
    return {
        "data": numpy.array([x[0] for x in raw]),
        "labels": numpy.array([x[1] for x in raw])
    }

In [6]:
def mirror_data_with_overlap(n, vision_range, category_count):
    """Return a n x size matrix
    Args:
        n (int): number of non-overlapping segments
        vision_range (int): number of objects to be seen at a time
        category_count (int): total number of different object categories
            including the mirror
    Returns:
        A dictionary:
            data: (n, v)-sized 2D numpy array where v is the vision_range
            labels: (n,)-sized 1D numpy array (each element is either 1 or 0)
    """
    
    raw = [random_mirror(vision_range, category_count) for i in range(n)]
    merged = numpy.concatenate([x[0] for x in raw])
    data_set = [merged[i:i+vision_range] for i in range(len(merged)-vision_range)]
    labels = [mirror_label(0 in x) for x in data_set]
    return {
        "data": numpy.array(data_set),
        "labels": labels
    }

Reflection-type testing data


In [7]:
def one_row(n, low, high):
    """Return one row of numbers
    
    Args:
        n (int): length
        low (int): lowest possible number
        high (int): highest possible number
    Returns:
        1D numpy array
    """
    return numpy.random.randint(low, high=high+1, size=n)

In [8]:
def half_mirror(m, n, low, high):
    """Return a matrix
    
    Args:
        m (int): number of rows
        n (int): number of columns
        low (int): lowest possible number
        high (int): highest possible number
    Returns:
        2D numpy array
    """
    return numpy.vstack([one_row(n, low, high) for i in range(m)])

In [9]:
def full_mirror(height, width, low, high):
    """Return a matrix
    Returns a symmetric image with 50% probability
    
    Args:
        height (int): image height
        width (int): image width
        low (int): lowest possible number
        high (int): highest possible number
    Returns:
        2D numpy array
    """
    m = (height - 1) // 2
    coin = numpy.random.random()
    if coin > 0.5:
        return  numpy.vstack([
                    half_mirror(m, width, low, high),
                    numpy.zeros(width),
                    half_mirror(m, width, low, high)
                ])
    else:
        half = half_mirror(m, width, low, high)
        return numpy.vstack([
                half,
                numpy.zeros(width),
                half[::-1, :]
            ])

In [10]:
def label_full_mirror(a):
    m, n = numpy.shape(a)
    top = a[0:(m-1)//2, :]
    bottom = a[(m+1)//2:, :]
    isMirror = bool(numpy.amax(numpy.absolute(top - bottom[-1::-1])) < 1.0e-5)
    label = mirror_label(isMirror)
    return (a.reshape((m*n,)), label)

In [11]:
def reflection_samples(N, height, width, low, high):
    """Generate reflection samples
    Args:
        N (int): total number of samples
        height (int): height of each image
        width (int): width of each image
        low (int): lowest number
        high (int): highest number
    Returns:
        a list of pairs, i.e., flattened image and its label
    """
    raw = [label_full_mirror(full_mirror(height, width, low, high))
            for i in range(N)]
    return {
        "data": numpy.array([x[0] for x in raw]),
        "labels": numpy.array([x[1] for x in raw]),
    }

Dataset


In [12]:
# dataset = {
#     "training": mirror_data(sample_count['training'], vision_range, category_count),
#     "test": mirror_data(sample_count['test'], vision_range, category_count),
#     "validation": mirror_data(sample_count['validation'], vision_range, category_count)
# }

In [13]:
# dataset = {
#     "training": mirror_data_with_overlap(segment_count['training'], vision_range, category_count),
#     "test": mirror_data_with_overlap(segment_count['test'], vision_range, category_count),
#     "validation": mirror_data_with_overlap(segment_count['validation'], vision_range, category_count)
# }

In [14]:
low, high = category_min, category_max
dataset = {
    "training": reflection_samples(sample_count['training'], image_height, image_width, low, high),
    "test": reflection_samples(sample_count['test'], image_height, image_width, low, high),
    "validation":reflection_samples(sample_count['validation'], image_height, image_width, low, high)
}

In [20]:
dataset['test']['data'][1]


Out[20]:
array([ 8.,  9.,  8.,  0.,  0.,  0.,  8.,  9.,  8.])

In [17]:
dataset['test']['labels'][1]


Out[17]:
array([1, 0])

Memory allocation


In [ ]:


In [374]:
x = tf.placeholder(tf.float32, [None, vision_range])

In [375]:
W = tf.Variable(tf.zeros([vision_range, label_type_count]))

In [376]:
b = tf.Variable(tf.zeros([label_type_count]))

In [377]:
y = tf.nn.softmax(tf.matmul(x, W) + b)

In [378]:
y_ = tf.placeholder(tf.float32, [None, label_type_count])

In [379]:
cross_entropy = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y)
)

In [380]:
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(cross_entropy)

In [381]:
# create a session
session = tf.InteractiveSession()

In [382]:
tf.global_variables_initializer().run()

I. Regular Neural Net

Training


In [383]:
session.run(train_step, feed_dict={x: dataset['training']['data'], y_: dataset['training']['labels']})

In [384]:
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))

In [385]:
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

Test


In [386]:
test_result = session.run(accuracy, feed_dict={x: dataset['test']['data'], y_: dataset['test']['labels']})

In [387]:
show_result(test_result)


56%

Validation


In [388]:
exam_result = session.run(accuracy, feed_dict={x: dataset['validation']['data'], y_: dataset['validation']['labels']})

In [389]:
show_result(exam_result)


57%

In [ ]: