Memory Representation in Dialogue Systems (Part 3)

Under construction, will update with explanations when finished.

Import


In [1]:
import pandas as pd
import numpy as np
import nltk
from sklearn.metrics import accuracy_score
from neo4j.v1 import GraphDatabase, basic_auth
from collections import defaultdict

In [2]:
refs_utts = pd.read_pickle('resources/utts_refs.pkl')
props = pd.read_pickle('resources/restaurants_props.pkl')
len(refs_utts), len(props)


Out[2]:
(414, 52256)

In [3]:
refs_utts[:5]


Out[3]:
text bot o ind mask gid target
0 [i, want, a, moderately, priced, restaurant, i... api_call R_cuisine west moderate trn 2 True 2 prezzo
2 [cheap, restaurant, in, the, north, part, of, ... api_call R_cuisine north cheap trn 2 True 11 da_vinci_pizzeria
3 [cheap, restaurant, in, the, south, part, of, ... api_call R_cuisine south cheap trn 2 True 12 the_lucky_star
4 [cheap, restaurant, serving, indian, food] api_call indian R_location cheap trn 2 True 15 the_gandhi
5 [thai, food] api_call thai R_location R_price trn 2 True 22 bangkok_city

In [4]:
props[:5]


Out[4]:
rname attr_key attr_value
3 saint_johns_chop_house R_cuisine british
4 saint_johns_chop_house R_location west
7 saint_johns_chop_house R_price moderate
10 prezzo R_cuisine italian
11 prezzo R_location west

Process Text


In [5]:
stemmer = nltk.stem.snowball.EnglishStemmer()

def stem(sentence):
    return [stemmer.stem(w) for w in sentence]

In [6]:
test = pd.DataFrame()
test['text'] = [stem(s) for s in refs_utts.text]
test['frame'] = [tuple(stem(f.split()[1:])) for f in refs_utts.bot]
len(test)


Out[6]:
414

In [7]:
# Remove poorly formatted frames
test = test[test.frame.map(len) == 3]
len(test)


Out[7]:
405

In [8]:
test[:5]


Out[8]:
text frame
0 [i, want, a, moder, price, restaur, in, the, w... (r_cuisin, west, moder)
1 [cheap, restaur, in, the, north, part, of, town] (r_cuisin, north, cheap)
2 [cheap, restaur, in, the, south, part, of, town] (r_cuisin, south, cheap)
3 [cheap, restaur, serv, indian, food] (indian, r_locat, cheap)
4 [thai, food] (thai, r_locat, r_price)

In [9]:
knowledge = pd.DataFrame()
knowledge['restaurant'] = props.rname.copy()
knowledge['key'] = [stemmer.stem(s) for s in props.attr_key]
knowledge['value'] = [stemmer.stem(s) for s in props.attr_value]

In [11]:
knowledge[:5]


Out[11]:
restaurant key value
3 saint_johns_chop_house r_cuisin british
4 saint_johns_chop_house r_locat west
7 saint_johns_chop_house r_price moder
10 prezzo r_cuisin italian
11 prezzo r_locat west

In [11]:
# A dictionary of keys to the list of values they can take
# In this instance, keys form mutually exclusive lists of values
types = knowledge[['key', 'value']] \
    .groupby('key') \
    .aggregate(lambda x: tuple(set(x))) \
    .reset_index() \
    .set_index('key') \
    .value \
    .to_dict()

In [12]:
types['r_cuisin'][:5]


Out[12]:
('asian_orient', 'vietnames', 'lebanes', 'african', 'thai')

In [13]:
types['r_locat']


Out[13]:
('centr', 'south', 'west', 'east', 'north')

In [14]:
types['r_price']


Out[14]:
('expens', 'moder', 'cheap')

Create Knowledge Graph


In [15]:
# Create a neo4j session
driver = GraphDatabase.driver('bolt://localhost:7687', auth=basic_auth('neo4j', 'neo4j'))

In [16]:
# WARNING: This will clear the database when run!
def reset_db():
    session = driver.session()
    session.run('MATCH (n) DETACH DELETE n')

In [17]:
reset_db()

In [18]:
session = driver.session()

for i,row in knowledge.iterrows():
    subject, relation, obj = row.restaurant, row.key, row.value
    session.run('''
        MERGE (s:SUBJECT {name: $subject}) 
        MERGE (o:OBJECT  {name: $obj}) 
        MERGE (s)-[r:RELATION {name: $relation}]->(o)
    ''', { 
        'subject': subject,
        'relation': relation,
        'obj': obj
    })

Test

Baseline

The baseline accuracy is the slot accuracy, calculated by the assumption of not knowing any frame values for any of the sentences.


In [19]:
dont_know = tuple(types.keys())
dont_know


Out[19]:
('r_cuisin', 'r_locat', 'r_price')

In [20]:
base_predicted = list(dont_know) * len(test)
base_actual = [w for frame in test.frame for w in frame]

In [21]:
accuracy_score(base_actual, base_predicted)


Out[21]:
0.45267489711934156

Accuracy


In [91]:
# Cache properties from DB
# Running this query will obtain all properties at this point in time
def get_properties():
    session = driver.session()
    return session.run('''
        MATCH ()-[r:RELATION]->(o:OBJECT) 
        RETURN collect(distinct o.name) AS properties
    ''').single()['properties']

In [92]:
# def get_types():
#     session = driver.session()
#     result = session.run('''
#         MATCH ()-[r:RELATION]->(o:OBJECT) 
#         RETURN collect(distinct [r.name, o.name]) AS pair
#     ''').single()[0]
    
#     g_types = defaultdict(lambda: [])
#     for k,v in result:
#         g_types[k].append(v)
#     return g_types

In [115]:
properties = set(get_properties())

In [116]:
# Hotword listener
def is_hotword(word):
    return word in properties

In [117]:
is_hotword('british'), is_hotword('python')


Out[117]:
(True, False)

In [122]:
# Issue DB queries
def find_slot(prop):
    return session.run('''
        MATCH (s:SUBJECT)-[r:RELATION]->(o:OBJECT {name:$name}) 
        RETURN collect(distinct [r.name, o.name]) AS properties
    ''', {
        'name': prop
    })

def extract(result):
    return result.single()['properties'][0]

In [123]:
session = driver.session()
extract(find_slot('west'))


Out[123]:
['r_locat', 'west']

In [183]:
session = driver.session()
all_slots = [[find_slot(word) for word in sentence if is_hotword(word)] for sentence in test.text]
extracted_slots = [[tuple(extract(slot)) for slot in slots] for slots in all_slots]
test['slots'] = extracted_slots

In [184]:
def to_frame(slots):
    frame = list(dont_know)
    s = dict(slots)
    
    for i,x in enumerate(frame):
        if x in s.keys():
            frame[i] = s[x]
    
    return tuple(frame)

In [185]:
test['predicted'] = [to_frame(slot) for slot in test.slots]

In [186]:
test[:5]


Out[186]:
text frame slots predicted
0 [i, want, a, moder, price, restaur, in, the, w... (r_cuisin, west, moder) [(r_price, moder), (r_locat, west)] (r_cuisin, west, moder)
1 [cheap, restaur, in, the, north, part, of, town] (r_cuisin, north, cheap) [(r_price, cheap), (r_locat, north)] (r_cuisin, north, cheap)
2 [cheap, restaur, in, the, south, part, of, town] (r_cuisin, south, cheap) [(r_price, cheap), (r_locat, south)] (r_cuisin, south, cheap)
3 [cheap, restaur, serv, indian, food] (indian, r_locat, cheap) [(r_price, cheap), (r_cuisin, indian)] (indian, r_locat, cheap)
4 [thai, food] (thai, r_locat, r_price) [(r_cuisin, thai)] (thai, r_locat, r_price)

In [173]:
predicted = [w for frame in test.predicted for w in frame]
actual = [w for frame in test.frame for w in frame]

In [187]:
accuracy_score(actual, predicted)


Out[187]:
0.96954732510288066

In [193]:
cm = nltk.ConfusionMatrix(actual, predicted)
print(cm.pretty_format(sort_by_count=True, show_percents=True, truncate=10))


         |      r                                                                |
         |      _      r             r                                           |
         |      c      _             _                                         e |
         |      u      l      c      p      m             n             s      x |
         |      i      o      h      r      o      e      o      w      o      p |
         |      s      c      e      i      d      a      r      e      u      e |
         |      i      a      a      c      e      s      t      s      t      n |
         |      n      t      p      e      r      t      h      t      h      s |
---------+-----------------------------------------------------------------------+
r_cuisin | <18.9%>     .      .      .      .      .      .      .      .      . |
 r_locat |      . <14.7%>     .      .      .      .      .   0.1%   0.6%      . |
   cheap |      .      . <11.9%>  0.2%      .      .      .      .      .      . |
 r_price |      .      .   0.1% <10.5%>  0.2%      .      .      .      .   0.1% |
   moder |      .      .      .   0.2%  <8.2%>     .      .      .      .      . |
    east |      .      .      .      .      .  <6.4%>     .      .      .      . |
   north |      .      .      .      .      .      .  <5.1%>     .      .      . |
    west |      .   0.2%      .      .      .      .      .  <3.3%>     .      . |
   south |      .      .      .      .      .      .      .      .  <3.0%>     . |
  expens |      .      .      .      .      .      .      .      .      .  <1.9%>|
---------+-----------------------------------------------------------------------+
(row = reference; col = test)


In [207]:
test[test.text.map(lambda s: 'cheap' in s)]


Out[207]:
text frame slots predicted
1 [cheap, restaur, in, the, north, part, of, town] (r_cuisin, north, cheap) [(r_price, cheap), (r_locat, north)] (r_cuisin, north, cheap)
2 [cheap, restaur, in, the, south, part, of, town] (r_cuisin, south, cheap) [(r_price, cheap), (r_locat, south)] (r_cuisin, south, cheap)
3 [cheap, restaur, serv, indian, food] (indian, r_locat, cheap) [(r_price, cheap), (r_cuisin, indian)] (indian, r_locat, cheap)
7 [im, look, for, a, cheap, restaur, in, the, no... (r_cuisin, north, cheap) [(r_price, cheap), (r_locat, north)] (r_cuisin, north, cheap)
10 [cheap, restaur] (r_cuisin, r_locat, cheap) [(r_price, cheap)] (r_cuisin, r_locat, cheap)
12 [i, want, a, cheap, restaur, in, the, west, pa... (r_cuisin, west, cheap) [(r_price, cheap), (r_locat, west)] (r_cuisin, west, cheap)
14 [i, am, look, for, a, cheap, restaur, in, the,... (r_cuisin, east, cheap) [(r_price, cheap), (r_locat, east)] (r_cuisin, east, cheap)
15 [im, look, for, a, cheap, restaur, serv, inter... (intern, r_locat, cheap) [(r_price, cheap), (r_cuisin, intern)] (intern, r_locat, cheap)
17 [look, for, a, cheap, restaur, in, the, south,... (r_cuisin, south, cheap) [(r_price, cheap), (r_locat, south)] (r_cuisin, south, cheap)
20 [look, for, someth, cheap, in, the, north, sid... (r_cuisin, north, cheap) [(r_price, cheap), (r_locat, north)] (r_cuisin, north, cheap)
33 [im, look, for, a, cheap, restaur, in, the, so... (r_cuisin, r_locat, cheap) [(r_price, cheap), (r_locat, south)] (r_cuisin, south, cheap)
40 [i, want, a, cheap, restaur, in, the, south, p... (r_cuisin, r_locat, cheap) [(r_price, cheap), (r_locat, south)] (r_cuisin, south, cheap)
44 [i, need, a, cheap, restaur, in, the, south, p... (r_cuisin, south, cheap) [(r_price, cheap), (r_locat, south)] (r_cuisin, south, cheap)
46 [i, want, a, cheap, restaur, in, the, east, pa... (r_cuisin, east, cheap) [(r_price, cheap), (r_locat, east)] (r_cuisin, east, cheap)
57 [breath, id, like, a, cheap, restaur, in, the,... (r_cuisin, south, cheap) [(r_price, cheap), (r_locat, south)] (r_cuisin, south, cheap)
61 [im, look, for, a, cheap, restaur, in, the, we... (r_cuisin, west, cheap) [(r_price, cheap), (r_locat, west)] (r_cuisin, west, cheap)
62 [i, would, like, to, find, a, cheap, restaur, ... (r_cuisin, south, cheap) [(r_price, cheap), (r_locat, south)] (r_cuisin, south, cheap)
68 [cheap, restaur, in, the, north, part, of, town] (r_cuisin, north, cheap) [(r_price, cheap), (r_locat, north)] (r_cuisin, north, cheap)
71 [i, would, like, a, cheap, restaur, in, the, n... (r_cuisin, north, cheap) [(r_price, cheap), (r_locat, north)] (r_cuisin, north, cheap)
72 [i, would, like, a, cheap, restaur, in, the, w... (r_cuisin, west, cheap) [(r_price, cheap), (r_locat, west)] (r_cuisin, west, cheap)
76 [im, look, for, a, cheap, restaur, in, the, we... (r_cuisin, west, cheap) [(r_price, cheap), (r_locat, west)] (r_cuisin, west, cheap)
82 [im, look, for, a, cheap, restaur, in, the, so... (r_cuisin, south, cheap) [(r_price, cheap), (r_locat, south)] (r_cuisin, south, cheap)
83 [im, look, for, a, cheap, restaur, in, the, ea... (r_cuisin, east, cheap) [(r_price, cheap), (r_locat, east)] (r_cuisin, east, cheap)
84 [cheap, restaur, serv, spanish, food] (spanish, r_locat, cheap) [(r_price, cheap), (r_cuisin, spanish)] (spanish, r_locat, cheap)
86 [im, look, for, a, cheap, restaur, in, the, no... (r_cuisin, north, cheap) [(r_price, cheap), (r_locat, north)] (r_cuisin, north, cheap)
89 [im, look, for, a, cheap, restaur, in, the, we... (r_cuisin, west, cheap) [(r_price, cheap), (r_locat, west)] (r_cuisin, west, cheap)
92 [im, look, for, a, cheap, restaur, in, the, ea... (r_cuisin, east, cheap) [(r_price, cheap), (r_locat, east)] (r_cuisin, east, cheap)
96 [i, need, a, cheap, restaur, in, the, west, pa... (r_cuisin, west, cheap) [(r_price, cheap), (r_locat, west)] (r_cuisin, west, cheap)
98 [im, look, for, a, cheap, restaur, in, the, no... (r_cuisin, north, cheap) [(r_price, cheap), (r_locat, north)] (r_cuisin, north, cheap)
99 [im, look, for, a, cheap, restaur, in, the, no... (r_cuisin, north, cheap) [(r_price, cheap), (r_locat, north)] (r_cuisin, north, cheap)
... ... ... ... ...
337 [look, for, a, cheap, restaur, in, the, east, ... (r_cuisin, east, cheap) [(r_price, cheap), (r_locat, east)] (r_cuisin, east, cheap)
340 [cheap, restaur, west, part, of, town] (r_cuisin, west, cheap) [(r_price, cheap), (r_locat, west)] (r_cuisin, west, cheap)
343 [im, look, for, a, cheap, restaur, and, it, sh... (r_cuisin, north, cheap) [(r_price, cheap), (r_locat, north)] (r_cuisin, north, cheap)
346 [im, look, for, a, cheap, restaur, in, the, we... (r_cuisin, west, cheap) [(r_price, cheap), (r_locat, west)] (r_cuisin, west, cheap)
350 [cheap, restaur, south, part, of, town] (r_cuisin, south, cheap) [(r_price, cheap), (r_locat, south)] (r_cuisin, south, cheap)
351 [iam, look, for, a, cheap, restaur, and, it, s... (r_cuisin, north, cheap) [(r_price, cheap), (r_locat, north)] (r_cuisin, north, cheap)
352 [uh, i, want, a, cheap, restaur, and, it, shou... (r_cuisin, north, cheap) [(r_price, cheap), (r_locat, north)] (r_cuisin, north, cheap)
353 [im, look, for, a, cheap, restaur, in, the, so... (r_cuisin, south, cheap) [(r_price, cheap), (r_locat, south)] (r_cuisin, south, cheap)
354 [i, would, like, a, cheap, restaur, in, the, s... (r_cuisin, r_locat, cheap) [(r_price, cheap), (r_locat, south)] (r_cuisin, south, cheap)
356 [can, i, have, a, cheap, restaur, in, the, wes... (r_cuisin, west, cheap) [(r_price, cheap), (r_locat, west)] (r_cuisin, west, cheap)
358 [i, am, look, for, a, cheap, restaur, in, the,... (r_cuisin, west, cheap) [(r_price, cheap), (r_locat, west)] (r_cuisin, west, cheap)
360 [a, want, a, cheap, restaur, in, the, north, p... (r_cuisin, north, cheap) [(r_price, cheap), (r_locat, north)] (r_cuisin, north, cheap)
365 [cheap, restaur, in, th, east, part, of, town] (r_cuisin, east, cheap) [(r_price, cheap), (r_locat, east)] (r_cuisin, east, cheap)
366 [im, look, for, a, cheap, restaur, and, it, sh... (r_cuisin, east, cheap) [(r_price, cheap), (r_locat, east)] (r_cuisin, east, cheap)
369 [cheap, restaur, in, th, east, part, of, town] (r_cuisin, east, cheap) [(r_price, cheap), (r_locat, east)] (r_cuisin, east, cheap)
371 [cheap, restaur, in, the, east, part, of, town] (r_cuisin, east, cheap) [(r_price, cheap), (r_locat, east)] (r_cuisin, east, cheap)
372 [im, look, for, a, cheap, restaur, in, the, ea... (r_cuisin, east, cheap) [(r_price, cheap), (r_locat, east)] (r_cuisin, east, cheap)
374 [look, for, someth, cheap, on, the, east, part... (r_cuisin, east, cheap) [(r_price, cheap), (r_locat, east)] (r_cuisin, east, cheap)
377 [im, look, for, a, cheap, restaur, serv, medit... (mediterranean, r_locat, cheap) [(r_price, cheap), (r_cuisin, mediterranean)] (mediterranean, r_locat, cheap)
378 [im, look, for, a, cheap, restaur, in, the, so... (r_cuisin, south, cheap) [(r_price, cheap), (r_locat, south)] (r_cuisin, south, cheap)
384 [im, look, for, a, cheap, restaur, that, serv,... (vietnames, r_locat, cheap) [(r_price, cheap), (r_cuisin, vietnames)] (vietnames, r_locat, cheap)
389 [im, look, for, a, cheap, restaur, in, the, ea... (r_cuisin, east, cheap) [(r_price, cheap), (r_locat, east)] (r_cuisin, east, cheap)
393 [im, look, for, a, cheap, restaur, in, the, ea... (r_cuisin, east, cheap) [(r_price, cheap), (r_locat, east)] (r_cuisin, east, cheap)
396 [cheap, restaur, east, part, of, town] (r_cuisin, east, cheap) [(r_price, cheap), (r_locat, east)] (r_cuisin, east, cheap)
398 [look, for, a, cheap, restaur, in, the, east, ... (r_cuisin, east, cheap) [(r_price, cheap), (r_locat, east)] (r_cuisin, east, cheap)
400 [cheap, restaur, on, the, east, part, of, town] (r_cuisin, east, cheap) [(r_price, cheap), (r_locat, east)] (r_cuisin, east, cheap)
406 [cheap, restaur, in, the, east, part, of, town] (r_cuisin, east, cheap) [(r_price, cheap), (r_locat, east)] (r_cuisin, east, cheap)
410 [i, need, a, cheap, restaur, in, the, east, pa... (r_cuisin, east, cheap) [(r_price, cheap), (r_locat, east)] (r_cuisin, east, cheap)
412 [cheap, restaur, east, part, of, town] (r_cuisin, east, cheap) [(r_price, cheap), (r_locat, east)] (r_cuisin, east, cheap)
413 [im, look, for, a, cheap, restaur, in, the, ea... (r_cuisin, east, cheap) [(r_price, cheap), (r_locat, east)] (r_cuisin, east, cheap)

145 rows × 4 columns


In [202]:
test[test.text.map(lambda s: 'south' in s)]['text'][284]


Out[202]:
['id',
 'like',
 'a',
 'cheap',
 'restaur',
 'in',
 'the',
 'south',
 'part',
 'of',
 'town']

In [ ]: