Social Bot

Goal: correctly identify whether a response is socially acceptable


In [542]:
import tensorflow as tf
import numpy
from numpy import ndarray

Define constants


In [543]:
sample_count = {
    "training": 8000,
    "test": 1300,
    "validation": 10000
    }

category_min = 1
category_max = 160

label_type_count = category_max - category_min + 1

image_height = 1
image_width = 2

flat_size = image_height * image_width

label_type_count = 2 # number of distinct labels (social or not)

learning_rate = 0.5

Auxiliary functions

Show result


In [544]:
def show_result(x):
    """Print the result"""
    print("{:.0f}%".format(x * 100))

In [545]:
numpy.exp([-1, -2, -3])


Out[545]:
array([ 0.36787944,  0.13533528,  0.04978707])

Automaticaly generate data sets


In [546]:
def social_rule(xs: ndarray, threshold=0.2):
    """
    The rule for juding whether a response is socially polite
    Args:
        xs (ndarray): inputs and responses (2D array)
        threshold (float): threshold above which it is considerred socially acceptible
    """
    def one_hot(x):
        if x:
            return [1, 0]
        else:
            return [0, 1]
    return one_hot(numpy.exp(-numpy.absolute(numpy.diff(xs))) > threshold)

In [547]:
def social_samples(n: int, low: int, high: int):
    """
    Args:
        n (int): number of data points
        low (int): lower bound
        high (int): upper bound
    """
    def aux(_):
        data = numpy.random.randint(low, high=high, size=2)
        return numpy.concatenate((data, social_rule(data)))
    data = numpy.vstack(map(aux, range(n)))
    print(data)
    return {
        "data": data[:, 0:2],
        "labels": data[:, 2:]
    }

In [548]:
social_samples(10, 1, 5)


[[3 1 0 1]
 [1 2 1 0]
 [3 4 1 0]
 [2 2 1 0]
 [2 1 1 0]
 [1 3 0 1]
 [3 2 1 0]
 [4 3 1 0]
 [3 4 1 0]
 [1 4 0 1]]
Out[548]:
{'data': array([[3, 1],
        [1, 2],
        [3, 4],
        [2, 2],
        [2, 1],
        [1, 3],
        [3, 2],
        [4, 3],
        [3, 4],
        [1, 4]]), 'labels': array([[0, 1],
        [1, 0],
        [1, 0],
        [1, 0],
        [1, 0],
        [0, 1],
        [1, 0],
        [1, 0],
        [1, 0],
        [0, 1]])}

Reflection-type testing data


In [549]:
low, high = category_min, category_max
dataset = {
    "training": social_samples(sample_count['training'], low, high),
    "test": social_samples(sample_count['test'], low, high),
    "validation": social_samples(sample_count['validation'], low, high)
}


[[ 61  58   0   1]
 [ 41 154   0   1]
 [118  83   0   1]
 ..., 
 [ 46  94   0   1]
 [ 59  92   0   1]
 [118 129   0   1]]
[[129 125   0   1]
 [157  39   0   1]
 [ 87  20   0   1]
 ..., 
 [154  30   0   1]
 [ 12 123   0   1]
 [ 43 127   0   1]]
[[ 20  31   0   1]
 [ 87  32   0   1]
 [113 145   0   1]
 ..., 
 [  5 139   0   1]
 [ 50  27   0   1]
 [159  45   0   1]]

In [550]:
dataset['training']['data'].shape


Out[550]:
(8000, 2)

In [551]:
dataset['test']['labels'][1]


Out[551]:
array([0, 1])

Memory allocation


In [552]:
x = tf.placeholder(tf.float32, [None, flat_size])

In [553]:
W = tf.Variable(tf.zeros([flat_size, label_type_count]))

In [554]:
b = tf.Variable(tf.zeros([label_type_count]))

In [555]:
y = tf.nn.softmax(tf.matmul(x, W) + b)

In [556]:
y_ = tf.placeholder(tf.float32, [None, label_type_count])

In [557]:
cross_entropy = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y)
)

In [558]:
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(cross_entropy)

In [559]:
# create a session
session = tf.InteractiveSession()

In [560]:
tf.global_variables_initializer().run()

I. Regular Neural Net

Training


In [561]:
session.run(train_step, feed_dict={x: dataset['training']['data'], y_: dataset['training']['labels']})

In [562]:
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))

In [563]:
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

Test


In [564]:
test_result = session.run(accuracy, feed_dict={x: dataset['test']['data'], y_: dataset['test']['labels']})

In [565]:
show_result(test_result)


98%

Validation


In [566]:
exam_result = session.run(accuracy, feed_dict={x: dataset['validation']['data'], y_: dataset['validation']['labels']})

In [567]:
show_result(exam_result)


98%

In [ ]:


In [ ]: