notebook.community

Edit and run



In [1]:

    
import pandas as pd



In [2]:

    
corpora_path = 'dialog-bAbI-tasks'



In [3]:

    
from gensim.models.word2vec import Word2Vec as w

w2v = w.load_word2vec_format('GoogleNews-vectors-negative300.bin',binary=True)









    



Slow version of gensim.models.doc2vec is being used



In [3]:

    
vec_size = len(w2v['red']) 

vec_size









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-3-8323695c454c> in <module>()
----> 1 vec_size = len(w2v['red'])
      2 
      3 vec_size

NameError: name 'w2v' is not defined



In [4]:

    
import os 

files = os.listdir(corpora_path)

files









    Out[4]:





['dialog-babi-task5-full-dialogs-trn.txt',
 'dialog-babi-task3-options-dev.txt',
 'dialog-babi-task2-API-refine-trn.txt',
 'dialog-babi-task3-options-trn.txt',
 'dialog-babi-task6-dstc2-tst.txt',
 'dialog-babi-task6-dstc2-trn.txt',
 'dialog-babi-task4-phone-address-trn.txt',
 'dialog-babi-task6-dstc2-candidates.txt',
 'dialog-babi-candidates.txt',
 'dialog-babi-task3-options-tst.txt',
 'dialog-babi-task4-phone-address-tst.txt',
 'dialog-babi-task4-phone-address-dev.txt',
 'dialog-babi-task1-API-calls-dev.txt',
 'dialog-babi-task2-API-refine-tst-OOV.txt',
 'dialog-babi-task3-options-tst-OOV.txt',
 'dialog-babi-task5-full-dialogs-tst-OOV.txt',
 'dialog-babi-task1-API-calls-trn.txt',
 'dialog-babi-kb-all.txt',
 'dialog-babi-task1-API-calls-tst.txt',
 'dialog-babi-task1-API-calls-tst-OOV.txt',
 'dialog-babi-task2-API-refine-dev.txt',
 'dialog-babi-task4-phone-address-tst-OOV.txt',
 'dialog-babi-task2-API-refine-tst.txt',
 'dialog-babi-task6-dstc2-dev.txt',
 'dialog-babi-task5-full-dialogs-dev.txt',
 'dialog-babi-task5-full-dialogs-tst.txt']



In [20]:

    
data_dict = dict()

for f in files:
    if 'candidates' in f: continue
    data_dict[f] = pd.read_csv(os.path.join(corpora_path, f), names=['text','bot'], delimiter='\t')



In [ ]:



In [21]:

    
task = 'task6'

task_data = [x for x in data_dict if task in x]

task_data









    Out[21]:





['dialog-babi-task6-dstc2-trn.txt',
 'dialog-babi-task6-dstc2-tst.txt',
 'dialog-babi-task6-dstc2-dev.txt']



In [22]:

    
train_data = data_dict[[x for x in task_data if 'trn' in x][0]]
dev_data = data_dict[[x for x in task_data if 'dev' in x][0]]
test_data = data_dict[[x for x in task_data if 'tst' in x][0]]

train_data['o'] = 'trn'
dev_data['o'] = 'dev'
test_data['o'] = 'tst'

c = pd.concat((train_data, dev_data, test_data))
c.index = range(len(c))



#c = c[~c['text'].str.contains("<SILENCE>")] # get rid of <SILENCE> markers

c = c.fillna("<unk>")
c['ind'] = c.text.map(lambda x: x.split()[0]) # split out the index into another column
c['text'] = c.text.map(lambda x: x.split()[1:])

# I hate this
gid = []
j = 1
for i in c.ind:
    if i == '1': j+=1
    gid.append(j)

c['gid'] = gid

mask = []
has_api_call = 'True'
for i,r in c.iterrows():
    if r.ind == '1': has_api_call = 'True'
    if "api_call" in r.bot: 
        mask += ['True']
        has_api_call = 'False'
        continue
    mask += [has_api_call]
c['mask'] = mask
#c = c.drop(['ind'],axis=1)



In [23]:

    
d = c.copy()



In [24]:

    
c[:100]









    Out[24]:






  
    
      
      text
      bot
      o
      ind
      gid
      mask
    
  
  
    
      0
      [<SILENCE>]
      Hello , welcome to the Cambridge restaurant sy...
      trn
      1
      2
      True
    
    
      1
      [i, want, a, moderately, priced, restaurant, i...
      api_call R_cuisine west moderate
      trn
      2
      2
      True
    
    
      2
      [saint_johns_chop_house, R_post_code, saint_jo...
      <unk>
      trn
      3
      2
      False
    
    
      3
      [saint_johns_chop_house, R_cuisine, british]
      <unk>
      trn
      4
      2
      False
    
    
      4
      [saint_johns_chop_house, R_location, west]
      <unk>
      trn
      5
      2
      False
    
    
      5
      [saint_johns_chop_house, R_phone, saint_johns_...
      <unk>
      trn
      6
      2
      False
    
    
      6
      [saint_johns_chop_house, R_address, saint_john...
      <unk>
      trn
      7
      2
      False
    
    
      7
      [saint_johns_chop_house, R_price, moderate]
      <unk>
      trn
      8
      2
      False
    
    
      8
      [saint_johns_chop_house, R_rating, 3]
      <unk>
      trn
      9
      2
      False
    
    
      9
      [prezzo, R_post_code, prezzo_post_code]
      <unk>
      trn
      10
      2
      False
    
    
      10
      [prezzo, R_cuisine, italian]
      <unk>
      trn
      11
      2
      False
    
    
      11
      [prezzo, R_location, west]
      <unk>
      trn
      12
      2
      False
    
    
      12
      [prezzo, R_phone, prezzo_phone]
      <unk>
      trn
      13
      2
      False
    
    
      13
      [prezzo, R_address, prezzo_address]
      <unk>
      trn
      14
      2
      False
    
    
      14
      [prezzo, R_price, moderate]
      <unk>
      trn
      15
      2
      False
    
    
      15
      [prezzo, R_rating, 9]
      <unk>
      trn
      16
      2
      False
    
    
      16
      [<SILENCE>]
      prezzo is a nice restaurant in the west of tow...
      trn
      17
      2
      False
    
    
      17
      [is, there, anything, else]
      You are looking for a restaurant is that right?
      trn
      18
      2
      False
    
    
      18
      [give, me, a, different, restaurant]
      prezzo is a nice restaurant in the west of tow...
      trn
      19
      2
      False
    
    
      19
      [goodbye]
      you are welcome
      trn
      20
      2
      False
    
    
      20
      [<SILENCE>]
      Hello , welcome to the Cambridge restaurant sy...
      trn
      1
      3
      True
    
    
      21
      [i, want, a, moderately, priced, restaurant, t...
      What kind of food would you like?
      trn
      2
      3
      True
    
    
      22
      [canapes, food]
      What part of town do you have in mind?
      trn
      3
      3
      True
    
    
      23
      [it, doesnt, matter]
      api_call canapes R_location moderate
      trn
      4
      3
      True
    
    
      24
      [api_call, no, result]
      <unk>
      trn
      5
      3
      False
    
    
      25
      [<SILENCE>]
      yippee_noodle_bar is a nice restaurant in the ...
      trn
      6
      3
      False
    
    
      26
      [gastropub, food]
      Sorry there is no canapes restaurant in the mo...
      trn
      7
      3
      False
    
    
      27
      [i, want, a, gastropub, restaurant]
      the cow pizza kitchen and bar is a nice restau...
      trn
      8
      3
      False
    
    
      28
      [what, is, the, address]
      Sure , the cow pizza kitchen and bar is on the...
      trn
      9
      3
      False
    
    
      29
      [what, is, the, phone, number]
      The phone number of the cow pizza kitchen and ...
      trn
      10
      3
      False
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      70
      [pizza_hut_city_centre, R_address, pizza_hut_c...
      <unk>
      trn
      39
      4
      False
    
    
      71
      [pizza_hut_city_centre, R_price, cheap]
      <unk>
      trn
      40
      4
      False
    
    
      72
      [pizza_hut_city_centre, R_rating, 10]
      <unk>
      trn
      41
      4
      False
    
    
      73
      [<SILENCE>]
      pizza_hut_city_centre is a nice restaurant in ...
      trn
      42
      4
      False
    
    
      74
      [may, i, have, the, address, and, phone, numbe...
      Sure , pizza_hut_city_centre is on pizza_hut_c...
      trn
      43
      4
      False
    
    
      75
      [and, the, phone, number, please]
      The phone number of pizza_hut_city_centre is p...
      trn
      44
      4
      False
    
    
      76
      [thank, you, good, bye]
      you are welcome
      trn
      45
      4
      False
    
    
      77
      [<SILENCE>]
      Hello , welcome to the Cambridge restaurant sy...
      trn
      1
      5
      True
    
    
      78
      [im, looking, for, canapes, food, in, any, pri...
      I am sorry but there is no canapes restaurant ...
      trn
      2
      5
      True
    
    
      79
      [how, about, european]
      api_call european R_location R_price
      trn
      3
      5
      True
    
    
      80
      [eraina, R_post_code, eraina_post_code]
      <unk>
      trn
      4
      5
      False
    
    
      81
      [eraina, R_cuisine, european]
      <unk>
      trn
      5
      5
      False
    
    
      82
      [eraina, R_location, centre]
      <unk>
      trn
      6
      5
      False
    
    
      83
      [eraina, R_phone, eraina_phone]
      <unk>
      trn
      7
      5
      False
    
    
      84
      [eraina, R_address, eraina_address]
      <unk>
      trn
      8
      5
      False
    
    
      85
      [eraina, R_price, expensive]
      <unk>
      trn
      9
      5
      False
    
    
      86
      [eraina, R_rating, 1]
      <unk>
      trn
      10
      5
      False
    
    
      87
      [riverside_brasserie, R_post_code, riverside_b...
      <unk>
      trn
      11
      5
      False
    
    
      88
      [riverside_brasserie, R_cuisine, european]
      <unk>
      trn
      12
      5
      False
    
    
      89
      [riverside_brasserie, R_location, centre]
      <unk>
      trn
      13
      5
      False
    
    
      90
      [riverside_brasserie, R_phone, riverside_brass...
      <unk>
      trn
      14
      5
      False
    
    
      91
      [riverside_brasserie, R_address, riverside_bra...
      <unk>
      trn
      15
      5
      False
    
    
      92
      [riverside_brasserie, R_price, moderate]
      <unk>
      trn
      16
      5
      False
    
    
      93
      [riverside_brasserie, R_rating, 2]
      <unk>
      trn
      17
      5
      False
    
    
      94
      [de_luca_cucina_and_bar, R_post_code, de_luca_...
      <unk>
      trn
      18
      5
      False
    
    
      95
      [de_luca_cucina_and_bar, R_cuisine, european]
      <unk>
      trn
      19
      5
      False
    
    
      96
      [de_luca_cucina_and_bar, R_location, centre]
      <unk>
      trn
      20
      5
      False
    
    
      97
      [de_luca_cucina_and_bar, R_phone, de_luca_cuci...
      <unk>
      trn
      21
      5
      False
    
    
      98
      [de_luca_cucina_and_bar, R_address, de_luca_cu...
      <unk>
      trn
      22
      5
      False
    
    
      99
      [de_luca_cucina_and_bar, R_price, moderate]
      <unk>
      trn
      23
      5
      False
    
  

100 rows × 6 columns

Find all possible entities and their attributes



In [ ]:



In [25]:

    
restaurants = c[c.bot.str.contains('<unk>')]
restaurants = restaurants.text.apply(lambda x: pd.Series(x))
restaurants.columns = ['rname', 'attr_key', 'attr_value' ]
restaurans = restaurants.drop_duplicates()
restaurants = restaurants[restaurants.rname != 'api_key']
restaurants = restaurants[restaurants.rname != 'ask']
restaurants = restaurants[restaurants.attr_key != 'no']

attrs = ['R_cuisine', 'R_location', 'R_price']



restaurants = restaurants[restaurants.attr_key.isin(attrs)]

restaurants.to_pickle('restaurants_props.pkl')


"number of restaurants:", len(set(restaurants.rname))









    Out[25]:





('number of restaurants:', 104)



In [123]:

    
restaurants[restaurants.rname == 'cote']









    Out[123]:






  
    
      
      rname
      attr_key
      attr_value
    
  
  
    
      2126
      cote
      R_cuisine
      french
    
    
      2127
      cote
      R_location
      centre
    
    
      2130
      cote
      R_price
      expensive
    
    
      2262
      cote
      R_cuisine
      french
    
    
      2263
      cote
      R_location
      centre
    
    
      2266
      cote
      R_price
      expensive
    
    
      2461
      cote
      R_cuisine
      french
    
    
      2462
      cote
      R_location
      centre
    
    
      2465
      cote
      R_price
      expensive
    
    
      3869
      cote
      R_cuisine
      french
    
    
      3870
      cote
      R_location
      centre
    
    
      3873
      cote
      R_price
      expensive
    
    
      4772
      cote
      R_cuisine
      french
    
    
      4773
      cote
      R_location
      centre
    
    
      4776
      cote
      R_price
      expensive
    
    
      5662
      cote
      R_cuisine
      french
    
    
      5663
      cote
      R_location
      centre
    
    
      5666
      cote
      R_price
      expensive
    
    
      5810
      cote
      R_cuisine
      french
    
    
      5811
      cote
      R_location
      centre
    
    
      5814
      cote
      R_price
      expensive
    
    
      7222
      cote
      R_cuisine
      french
    
    
      7223
      cote
      R_location
      centre
    
    
      7226
      cote
      R_price
      expensive
    
    
      8453
      cote
      R_cuisine
      french
    
    
      8454
      cote
      R_location
      centre
    
    
      8457
      cote
      R_price
      expensive
    
    
      11582
      cote
      R_cuisine
      french
    
    
      11583
      cote
      R_location
      centre
    
    
      11586
      cote
      R_price
      expensive
    
    
      ...
      ...
      ...
      ...
    
    
      141427
      cote
      R_cuisine
      french
    
    
      141428
      cote
      R_location
      centre
    
    
      141431
      cote
      R_price
      expensive
    
    
      142227
      cote
      R_cuisine
      french
    
    
      142228
      cote
      R_location
      centre
    
    
      142231
      cote
      R_price
      expensive
    
    
      146691
      cote
      R_cuisine
      french
    
    
      146692
      cote
      R_location
      centre
    
    
      146695
      cote
      R_price
      expensive
    
    
      146791
      cote
      R_cuisine
      french
    
    
      146792
      cote
      R_location
      centre
    
    
      146795
      cote
      R_price
      expensive
    
    
      147110
      cote
      R_cuisine
      french
    
    
      147111
      cote
      R_location
      centre
    
    
      147114
      cote
      R_price
      expensive
    
    
      148930
      cote
      R_cuisine
      french
    
    
      148931
      cote
      R_location
      centre
    
    
      148934
      cote
      R_price
      expensive
    
    
      149754
      cote
      R_cuisine
      french
    
    
      149755
      cote
      R_location
      centre
    
    
      149758
      cote
      R_price
      expensive
    
    
      151624
      cote
      R_cuisine
      french
    
    
      151625
      cote
      R_location
      centre
    
    
      151628
      cote
      R_price
      expensive
    
    
      152377
      cote
      R_cuisine
      french
    
    
      152378
      cote
      R_location
      centre
    
    
      152381
      cote
      R_price
      expensive
    
    
      153724
      cote
      R_cuisine
      french
    
    
      153725
      cote
      R_location
      centre
    
    
      153728
      cote
      R_price
      expensive
    
  

408 rows × 3 columns



In [11]:

    
cols = list(set(restaurants.attr_key))
r = restaurants.pivot_table('attr_value', ['rname'], 'attr_key', aggfunc=lambda x: list(set(x))[0])
r = pd.get_dummies(data = r, columns = cols )
r['rname'] = r.index
c = r.columns.tolist()
c.insert(0, c.pop(c.index('rname')))
r = r.reindex(columns= c)

r.to_pickle('restaurants.pkl')

r[:10]









    Out[11]:






  
    
      
      rname
      R_cuisine_african
      R_cuisine_asian_oriental
      R_cuisine_bistro
      R_cuisine_british
      R_cuisine_chinese
      R_cuisine_european
      R_cuisine_french
      R_cuisine_fusion
      R_cuisine_gastropub
      ...
      R_cuisine_turkish
      R_cuisine_vietnamese
      R_price_cheap
      R_price_expensive
      R_price_moderate
      R_location_centre
      R_location_east
      R_location_north
      R_location_south
      R_location_west
    
    
      rname
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      ali_baba
      ali_baba
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      1
      1
      0
      0
      0
      0
    
    
      anatolia
      anatolia
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      1
      0
      0
      0
      1
      1
      0
      0
      0
      0
    
    
      backstreet_bistro
      backstreet_bistro
      0
      0
      1
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      1
      0
      1
      0
      0
      0
      0
    
    
      bangkok_city
      bangkok_city
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      1
      0
      1
      0
      0
      0
      0
    
    
      bedouin
      bedouin
      1
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      1
      0
      1
      0
      0
      0
      0
    
    
      bloomsbury_restaurant
      bloomsbury_restaurant
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      1
      1
      0
      0
      0
      0
    
    
      caffe_uno
      caffe_uno
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      1
      0
      1
      0
      0
      0
      0
    
    
      cambridge_lodge_restaurant
      cambridge_lodge_restaurant
      0
      0
      0
      0
      0
      1
      0
      0
      0
      ...
      0
      0
      0
      1
      0
      0
      0
      0
      0
      1
    
    
      charlie_chan
      charlie_chan
      0
      0
      0
      0
      1
      0
      0
      0
      0
      ...
      0
      0
      1
      0
      0
      1
      0
      0
      0
      0
    
    
      chiquito_restaurant_bar
      chiquito_restaurant_bar
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      1
      0
      0
      0
      0
      1
      0
    
  

10 rows × 33 columns



In [12]:

    
restaurants = r
restaurants[:10]









    Out[12]:






  
    
      
      rname
      R_cuisine_african
      R_cuisine_asian_oriental
      R_cuisine_bistro
      R_cuisine_british
      R_cuisine_chinese
      R_cuisine_european
      R_cuisine_french
      R_cuisine_fusion
      R_cuisine_gastropub
      ...
      R_cuisine_turkish
      R_cuisine_vietnamese
      R_price_cheap
      R_price_expensive
      R_price_moderate
      R_location_centre
      R_location_east
      R_location_north
      R_location_south
      R_location_west
    
    
      rname
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      ali_baba
      ali_baba
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      1
      1
      0
      0
      0
      0
    
    
      anatolia
      anatolia
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      1
      0
      0
      0
      1
      1
      0
      0
      0
      0
    
    
      backstreet_bistro
      backstreet_bistro
      0
      0
      1
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      1
      0
      1
      0
      0
      0
      0
    
    
      bangkok_city
      bangkok_city
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      1
      0
      1
      0
      0
      0
      0
    
    
      bedouin
      bedouin
      1
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      1
      0
      1
      0
      0
      0
      0
    
    
      bloomsbury_restaurant
      bloomsbury_restaurant
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      1
      1
      0
      0
      0
      0
    
    
      caffe_uno
      caffe_uno
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      1
      0
      1
      0
      0
      0
      0
    
    
      cambridge_lodge_restaurant
      cambridge_lodge_restaurant
      0
      0
      0
      0
      0
      1
      0
      0
      0
      ...
      0
      0
      0
      1
      0
      0
      0
      0
      0
      1
    
    
      charlie_chan
      charlie_chan
      0
      0
      0
      0
      1
      0
      0
      0
      0
      ...
      0
      0
      1
      0
      0
      1
      0
      0
      0
      0
    
    
      chiquito_restaurant_bar
      chiquito_restaurant_bar
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      1
      0
      0
      0
      0
      1
      0
    
  

10 rows × 33 columns

Find the suggesed restaurants



In [13]:

    
c = d.copy()



In [14]:

    
suggested = c[c.text.apply(str).str.contains('<SILENCE>') | c.text.apply(str).str.contains('api_call')]
suggested.loc[suggested.text.apply(str).str.contains('api_call'), 'bot'] = "no_result"
print(len(suggested[suggested.bot == 'no_result']))

suggested['target'] = suggested.bot.map(lambda x: x.split()[0])
possible_targets = list(set(restaurants.rname)) + ['no_result']

suggested = suggested[suggested.target.isin(possible_targets)]
#suggested = suggested[~suggested.bot.str.contains('api_call')]

suggested.drop(['text','bot', 'ind', 'mask'], axis=1, inplace=True)


# dropping duplicates means we only care about the first api_call in the dialogue
suggested = suggested.drop_duplicates(subset=['o','gid'])

len(suggested)









    



462






    



/home/casey/.local/lib/python3.5/site-packages/pandas/core/indexing.py:477: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s
/home/casey/.local/lib/python3.5/site-packages/ipykernel/__main__.py:5: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy






    Out[14]:





3026



In [15]:

    
suggested









    Out[15]:






  
    
      
      o
      gid
      target
    
  
  
    
      16
      trn
      2
      prezzo
    
    
      24
      trn
      3
      no_result
    
    
      34
      trn
      4
      no_result
    
    
      136
      trn
      5
      michaelhouse_cafe
    
    
      228
      trn
      6
      the_nirala
    
    
      467
      trn
      8
      the_missing_sock
    
    
      498
      trn
      9
      no_result
    
    
      710
      trn
      10
      riverside_brasserie
    
    
      936
      trn
      11
      da_vinci_pizzeria
    
    
      948
      trn
      12
      the_lucky_star
    
    
      986
      trn
      13
      saint_johns_chop_house
    
    
      1013
      trn
      14
      the_missing_sock
    
    
      1047
      trn
      15
      the_gandhi
    
    
      1267
      trn
      17
      no_result
    
    
      1299
      trn
      18
      royal_spice
    
    
      1321
      trn
      19
      pizza_hut_cherry_hinton
    
    
      1329
      trn
      20
      no_result
    
    
      1507
      trn
      21
      charlie_chan
    
    
      1527
      trn
      22
      bangkok_city
    
    
      1540
      trn
      23
      pizza_hut_fen_ditton
    
    
      1549
      trn
      24
      no_result
    
    
      1579
      trn
      25
      no_result
    
    
      1644
      trn
      26
      no_result
    
    
      1663
      trn
      27
      prezzo
    
    
      1682
      trn
      28
      da_vinci_pizzeria
    
    
      1703
      trn
      29
      prezzo
    
    
      1722
      trn
      30
      prezzo
    
    
      1742
      trn
      31
      thanh_binh
    
    
      1779
      trn
      32
      la_margherita
    
    
      1798
      trn
      33
      frankie_and_bennys
    
    
      ...
      ...
      ...
      ...
    
    
      152349
      tst
      3206
      efes_restaurant
    
    
      152370
      tst
      3207
      thanh_binh
    
    
      152390
      tst
      3208
      cote
    
    
      152415
      tst
      3209
      thanh_binh
    
    
      152577
      tst
      3210
      royal_spice
    
    
      152591
      tst
      3211
      wagamama
    
    
      152640
      tst
      3212
      shanghai_family_restaurant
    
    
      152668
      tst
      3213
      the_nirala
    
    
      152689
      tst
      3214
      royal_spice
    
    
      152709
      tst
      3215
      pizza_hut_cherry_hinton
    
    
      152730
      tst
      3216
      la_margherita
    
    
      152877
      tst
      3217
      nandos
    
    
      152984
      tst
      3218
      peking_restaurant
    
    
      153016
      tst
      3219
      efes_restaurant
    
    
      153039
      tst
      3220
      prezzo
    
    
      153251
      tst
      3221
      la_tasca
    
    
      153299
      tst
      3222
      shiraz_restaurant
    
    
      153313
      tst
      3223
      the_missing_sock
    
    
      153343
      tst
      3224
      the_gardenia
    
    
      153408
      tst
      3225
      galleria
    
    
      153449
      tst
      3226
      yu_garden
    
    
      153545
      tst
      3228
      gourmet_burger_kitchen
    
    
      153654
      tst
      3229
      shanghai_family_restaurant
    
    
      153672
      tst
      3230
      sala_thong
    
    
      153701
      tst
      3231
      pizza_hut_cherry_hinton
    
    
      153712
      tst
      3232
      no_result
    
    
      153737
      tst
      3233
      restaurant_two_two
    
    
      153901
      tst
      3234
      tandoori_palace
    
    
      153928
      tst
      3235
      la_margherita
    
    
      153965
      tst
      3236
      taj_tandoori
    
  

3026 rows × 3 columns

Get the previous X utts, link with suggestion



In [16]:

    
c = d.copy()



In [17]:

    
import numpy as np

c = c[~c.text.apply(str).str.contains('<SILENCE>')]
c = c[~c.bot.apply(str).str.contains('<unk>')]
c.loc[~c.bot.apply(str).str.contains('api_call'), 'bot'] = ""

# need a bit of discoures history
#c['text1'] = c.text.shift(1)
#c['text2'] = c.text.shift(2)
#c['text3'] = c.text.shift(3)
#c.dropna(subset=['text', 'text1'], inplace=True)

#c['text'] = c.text2.map(list) + c.text1.map(list) + c.text.map(list)
#c['text'] = c.text1.map(list) + c.text.map(list)
#c.drop(['text1'], axis=1, inplace=True)

# dropping duplicates means we only care about the first api_call in the dialogue
#c = c.drop_duplicates(subset=['o','gid'])


#

#c = pd.merge(c, suggested, on=['o','gid'], how='left')

c = c[c['mask'].str.contains('True')]
c = c.groupby('gid').agg(sum)
c = c[c['mask'] == 'True']
c['gid'] = c.index
c = pd.merge(c, suggested, on=['o','gid'], how='left')



c.dropna(inplace=True)
len(c)









    Out[17]:





414



In [18]:

    
c.to_pickle('utts_refs.pkl')



In [19]:

    
c[:5]









    Out[19]:






  
    
      
      text
      bot
      o
      ind
      mask
      gid
      target
    
  
  
    
      0
      [i, want, a, moderately, priced, restaurant, i...
      api_call R_cuisine west moderate
      trn
      2
      True
      2
      prezzo
    
    
      2
      [cheap, restaurant, in, the, north, part, of, ...
      api_call R_cuisine north cheap
      trn
      2
      True
      11
      da_vinci_pizzeria
    
    
      3
      [cheap, restaurant, in, the, south, part, of, ...
      api_call R_cuisine south cheap
      trn
      2
      True
      12
      the_lucky_star
    
    
      4
      [cheap, restaurant, serving, indian, food]
      api_call indian R_location cheap
      trn
      2
      True
      15
      the_gandhi
    
    
      5
      [thai, food]
      api_call thai R_location R_price
      trn
      2
      True
      22
      bangkok_city



In [ ]:

Grab only the utts where there is an api_call



In [133]:

    
# make the text (which is a list of words) into a single column of words
s = c.text.apply(lambda x: pd.Series(x)).stack().reset_index(level=1, drop=True)
s.name = 'word'
c = c.drop('text', axis=1).join(s)
c.dropna(subset=['bot'], inplace=True)


#c['w2v'] = c.text.map(lambda x: [w2v[i] for i in x if i in w2v])
c = c[~c.word.apply(str).str.contains('_')]
c['w2v'] = c.word.map(lambda x: w2v[x] if x in w2v else np.zeros(vec_size))
attr_df = c.bot.apply(lambda x: pd.Series(x.split()))
c['type'], c['loc'], c['price'] = attr_df[1], attr_df[2], attr_df[3]
c = pd.get_dummies(data = c, columns = ['type','loc','price'] )
    
data = c

len(data)









    Out[133]:





3783



In [134]:

    
train_data = data[data.o == 'trn'].drop(['o'], axis=1)
dev_data = data[data.o == 'dev'].drop(['o'], axis=1)
test_data = data[data.o == 'tst'].drop(['o'], axis=1)

train_data.shape, test_data.shape, dev_data.shape









    Out[134]:





((2138, 47), (401, 47), (1244, 47))



In [135]:

    
train_data[:3]









    Out[135]:






  
    
      
      bot
      ind
      mask
      gid
      target
      word
      w2v
      type_R_cuisine
      type_african
      type_austrian
      ...
      type_vietnamese
      loc_R_location
      loc_east
      loc_north
      loc_south
      loc_west
      price_R_price
      price_cheap
      price_expensive
      price_moderate
    
  
  
    
      0
      api_call R_cuisine west moderate
      2
      True
      2
      prezzo
      i
      [-0.225586, -0.0195312, 0.0908203, 0.237305, -...
      1
      0
      0
      ...
      0
      0
      0
      0
      0
      1
      0
      0
      0
      1
    
    
      0
      api_call R_cuisine west moderate
      2
      True
      2
      prezzo
      want
      [0.136719, 0.148438, 0.114746, 0.0698242, -0.1...
      1
      0
      0
      ...
      0
      0
      0
      0
      0
      1
      0
      0
      0
      1
    
    
      0
      api_call R_cuisine west moderate
      2
      True
      2
      prezzo
      a
      [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...
      1
      0
      0
      ...
      0
      0
      0
      0
      0
      1
      0
      0
      0
      1
    
  

3 rows × 47 columns

Train



In [136]:

    
start_col = 'type_R_cuisine'



In [137]:

    
import numpy as np

y_train = train_data.ix[:,start_col:].as_matrix() 

X_train = train_data.w2v.as_matrix()
X_train = np.array(list(X_train), dtype=np.float) # needed to fit the regression model

X_train.shape, y_train.shape









    Out[137]:





((2138, 300), (2138, 40))



In [138]:

    
from sklearn.linear_model import *
import numpy as np


model = Ridge(alpha=0.1, copy_X=True, fit_intercept=True, max_iter=None,
      normalize=True, random_state=None, solver='auto', tol=0.01)

model.fit(X_train, y_train)









    Out[138]:





Ridge(alpha=0.1, copy_X=True, fit_intercept=True, max_iter=None,
   normalize=True, random_state=None, solver='auto', tol=0.01)

Test: How well can it predict the 3 usual attributes?



In [139]:

    
# word level composition
def compute_target(utt):
    predictions = [model.predict(w.reshape(1,-1))[0] for w in utt.w2v]
    return compose(predictions)



In [140]:

    
#
# composition by union of vectors
#
def compose(predictions):
    p = predictions[0]
    for i in predictions[1:]:
        p = np.logical_or(i, p)
    return p



In [141]:

    
# attribute level composition
def compute_target(utt):
    predictions = utt.w2v.values
    p = compose(predictions)
    return model.predict(p.reshape(1,-1))



In [142]:

    
#
#  this is the composition function, it just sums vectors
#
def compose(predictions):
    p = predictions[0]
    for i in predictions[1:]:
        p = np.sum((i, p),axis=0)
    return p



In [143]:

    
targets = train_data.drop(['bot','word','w2v', 'target', 'gid','mask','ind'], 1)
targets = targets.drop_duplicates()
#targets = targets.ix[:,start_col:].as_matrix() 

train_data.shape, targets.shape

targets[:10]









    Out[143]:






  
    
      
      type_R_cuisine
      type_african
      type_austrian
      type_british
      type_canapes
      type_cantonese
      type_catalan
      type_chinese
      type_cuban
      type_european
      ...
      type_vietnamese
      loc_R_location
      loc_east
      loc_north
      loc_south
      loc_west
      price_R_price
      price_cheap
      price_expensive
      price_moderate
    
  
  
    
      0
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      1
      0
      0
      0
      1
    
    
      2
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      1
      0
      0
      0
      1
      0
      0
    
    
      3
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      1
      0
      0
      1
      0
      0
    
    
      4
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      1
      0
      0
      0
      0
      0
      1
      0
      0
    
    
      5
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      1
      0
      0
      0
      0
      1
      0
      0
      0
    
    
      6
      0
      0
      0
      0
      0
      0
      0
      0
      0
      1
      ...
      0
      1
      0
      0
      0
      0
      1
      0
      0
      0
    
    
      11
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      1
      0
      0
      0
      0
      0
      1
      0
      0
    
    
      12
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      1
      0
      0
      0
      0
      1
      0
      0
      0
    
    
      13
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      1
      0
      1
      0
      0
    
    
      15
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      1
      0
      0
      0
      0
      1
      0
      0
    
  

10 rows × 40 columns



In [144]:

    
import operator
import scipy
import sklearn

eval_data = dev_data
gold=[]
guess=[]

indeces = list(set(eval_data.index))
print('num instances', len(indeces))

for eid in indeces:
    sub = eval_data[eval_data.index == eid] # grab the RE for this scene
    target = compute_target(sub) # compose the predictions of each word to a target vector
    distances = [(v, scipy.spatial.distance.cosine(target,v.ix[start_col:])) for i,v in targets.iterrows()]
    guess += [min(distances, key=operator.itemgetter(1))[0]] # which object has the shortest distance?
    gold += [sub.iloc[-1].ix[start_col:]] # all the rows in sub have the same matrix









    



num instances 128






    



/home/casey/.local/lib/python3.5/site-packages/scipy/spatial/distance.py:326: RuntimeWarning: invalid value encountered in double_scalars
  dist = 1.0 - np.dot(u, v) / (norm(u) * norm(v))



In [145]:

    
#
guess=np.array(guess,dtype=np.float)
gold=np.array(gold,dtype=np.float)

sklearn.metrics.f1_score(gold, guess, average='micro', labels=np.array([0,1],dtype=np.float))









    Out[145]:





0.89322916666666663



In [ ]:

Incremental Evaluation



In [146]:

    
from collections import defaultdict as dd

incr_results = dd(list)
filled_slots = dd(list)

for eid in indeces:
    pre_sub = eval_data[eval_data.index == eid] # grab the RE for this scene
    for i in range(1,len(pre_sub)):
        sub = pre_sub[:i]
        gold=[]
        guess=[]
        target = compute_target(sub) # compose the predictions of each word to a target vector
        distances = [(v, scipy.spatial.distance.cosine(target,v.ix[start_col:])) for i,v in targets.iterrows()]
        guess += [min(distances, key=operator.itemgetter(1))[0]] # which object has the shortest distance?
        gold += [sub.iloc[-1].ix[start_col:]] # all the rows in sub have the same matrix
        filled_slots[i].append(np.sum(guess))
        guess=np.array(guess,dtype=np.float)
        gold=np.array(gold,dtype=np.float)
        incr_results[i].append(sklearn.metrics.f1_score(gold, guess, average='micro', labels=np.array([0,1],dtype=np.float)))









    



/home/casey/.local/lib/python3.5/site-packages/scipy/spatial/distance.py:326: RuntimeWarning: invalid value encountered in double_scalars
  dist = 1.0 - np.dot(u, v) / (norm(u) * norm(v))



In [147]:

    
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline 

results = [(i,np.mean(incr_results[i])) for i in incr_results]

r = list(zip(*results))

plt.plot(r[0], r[1])









    Out[147]:





[<matplotlib.lines.Line2D at 0x7f43e0091668>]



In [ ]:

Test: How well can it predict the restaurant using all of a restaurant's attributes?



In [148]:

    
rdata = data[['ind','gid','target','word','w2v','o']]



In [149]:

    
rdata[:10]









    Out[149]:






  
    
      
      ind
      gid
      target
      word
      w2v
      o
    
  
  
    
      0
      2
      2
      prezzo
      i
      [-0.225586, -0.0195312, 0.0908203, 0.237305, -...
      trn
    
    
      0
      2
      2
      prezzo
      want
      [0.136719, 0.148438, 0.114746, 0.0698242, -0.1...
      trn
    
    
      0
      2
      2
      prezzo
      a
      [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...
      trn
    
    
      0
      2
      2
      prezzo
      moderately
      [0.145508, 0.0294189, -0.289062, 0.279297, 0.0...
      trn
    
    
      0
      2
      2
      prezzo
      priced
      [-0.0708008, 0.128906, -0.324219, 0.205078, -0...
      trn
    
    
      0
      2
      2
      prezzo
      restaurant
      [-0.148438, -0.124023, 0.0412598, 0.208984, -0...
      trn
    
    
      0
      2
      2
      prezzo
      in
      [0.0703125, 0.0869141, 0.0878906, 0.0625, 0.06...
      trn
    
    
      0
      2
      2
      prezzo
      the
      [0.0800781, 0.10498, 0.0498047, 0.0534668, -0....
      trn
    
    
      0
      2
      2
      prezzo
      west
      [-0.0927734, -0.12207, 0.140625, -0.0649414, 0...
      trn
    
    
      0
      2
      2
      prezzo
      part
      [0.0071106, 0.0563965, 0.0106812, 0.0825195, -...
      trn



In [150]:

    
restaurants[:5]









    Out[150]:






  
    
      
      rname
      R_price_cheap
      R_price_expensive
      R_price_moderate
      R_location_centre
      R_location_east
      R_location_north
      R_location_south
      R_location_west
      R_cuisine_african
      ...
      R_cuisine_lebanese
      R_cuisine_mediterranean
      R_cuisine_mexican
      R_cuisine_north_american
      R_cuisine_portuguese
      R_cuisine_seafood
      R_cuisine_spanish
      R_cuisine_thai
      R_cuisine_turkish
      R_cuisine_vietnamese
    
    
      rname
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      ali_baba
      ali_baba
      0
      0
      1
      1
      0
      0
      0
      0
      0
      ...
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      anatolia
      anatolia
      0
      0
      1
      1
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      1
      0
    
    
      backstreet_bistro
      backstreet_bistro
      0
      1
      0
      1
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      bangkok_city
      bangkok_city
      0
      1
      0
      1
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      1
      0
      0
    
    
      bedouin
      bedouin
      0
      1
      0
      1
      0
      0
      0
      0
      1
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
  

5 rows × 33 columns



In [158]:

    
num_cols = len(restaurants.columns) -1
start_col = 'R_price_cheap'
num_cols









    Out[158]:





32



In [159]:

    
rdata = rdata[rdata.target.isin(restaurants.rname)]
rdata['attrvec'] = np.array(rdata.target.map(lambda x: np.array(restaurants[restaurants.rname == x].ix[0:,start_col:].as_matrix()[0])))



In [160]:

    
rdata[:5]









    Out[160]:






  
    
      
      ind
      gid
      target
      word
      w2v
      o
      attrvec
    
  
  
    
      0
      2
      2
      prezzo
      i
      [-0.225586, -0.0195312, 0.0908203, 0.237305, -...
      trn
      [0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, ...
    
    
      0
      2
      2
      prezzo
      want
      [0.136719, 0.148438, 0.114746, 0.0698242, -0.1...
      trn
      [0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, ...
    
    
      0
      2
      2
      prezzo
      a
      [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...
      trn
      [0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, ...
    
    
      0
      2
      2
      prezzo
      moderately
      [0.145508, 0.0294189, -0.289062, 0.279297, 0.0...
      trn
      [0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, ...
    
    
      0
      2
      2
      prezzo
      priced
      [-0.0708008, 0.128906, -0.324219, 0.205078, -0...
      trn
      [0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, ...



In [161]:

    
train_data = rdata[rdata.o == 'trn'].drop(['o'], axis=1)
dev_data = rdata[rdata.o == 'dev'].drop(['o'], axis=1)
test_data = rdata[rdata.o == 'tst'].drop(['o'], axis=1)

train_data.shape, test_data.shape, dev_data.shape









    Out[161]:





((2006, 6), (395, 6), (1175, 6))



In [162]:

    
import numpy as np

y_train = [x for x in train_data.attrvec] # this shold just work with as_matri(), but the shape is always wrong

X_train = train_data.w2v.as_matrix()
X_train = np.array(list(X_train), dtype=np.float) # needed to fit the regression model

X_train.shape









    Out[162]:





(2006, 300)



In [ ]:



In [163]:

    
from sklearn.linear_model import *
import numpy as np


model = Ridge(alpha=0.1, copy_X=True, fit_intercept=True, max_iter=None,
      normalize=True, random_state=None, solver='auto', tol=0.01)

model.fit(X_train, y_train)









    Out[163]:





Ridge(alpha=0.1, copy_X=True, fit_intercept=True, max_iter=None,
   normalize=True, random_state=None, solver='auto', tol=0.01)



In [164]:

    
def compute_mrr(lst, target):
    i = 1.0
    for l in lst:
        if l == target: break
        i+=1
    return 1.0/i



In [165]:

    
import operator
import scipy
import sklearn

eval_data = dev_data
gold=[]
guess=[]

indeces = list(set(eval_data.index))
print('num instances', len(indeces))
mrr = 0.0
for eid in indeces:
    sub = rdata[rdata.index == eid] # grab the RE for this scene
    target = compute_target(sub) # compose the predictions of each word to a target vector
    distances = [(v['rname'], scipy.spatial.distance.cosine(target,v.ix[start_col:])) for i,v in restaurants.iterrows()]
    distances.sort(key=operator.itemgetter(1))
    guess += [distances[0][0]]
    d = list(zip(*distances))[0]
    mrr += compute_mrr(d, sub.iloc[-1].ix['target'])
    gold += [sub.iloc[-1].ix['target']] # all the rows in sub have the same matrix









    



num instances 119



In [166]:

    
sklearn.metrics.accuracy_score(gold, guess)









    Out[166]:





0.47058823529411764



In [167]:

    
mrr / len(gold)









    Out[167]:





0.6229480614059901

Find the restaurant incrementally



In [168]:

    
import operator
import scipy
import sklearn

eval_data = dev_data
gold=[]
guess=[]

indeces = list(set(eval_data.index))
print('num instances', len(indeces))
mrr = 0.0
for eid in indeces:
    pre_sub = rdata[rdata.index == eid] # grab the RE for this scene
    for i in range(1,len(pre_sub)):
        sub = pre_sub[:i]
        gold = []
        guess = []
        target = compute_target(sub) # compose the predictions of each word to a target vector
        distances = [(v['rname'], scipy.spatial.distance.cosine(target,v.ix[start_col:])) for i,v in restaurants.iterrows()]
        distances.sort(key=operator.itemgetter(1))
        guess += [distances[0][0]]
        d = list(zip(*distances))[0]
        mrr += compute_mrr(d, sub.iloc[-1].ix['target'])
        gold += [sub.iloc[-1].ix['target']] # all the rows in sub have the same matrix
        incr_results[i].append(sklearn.metrics.accuracy_score(gold, guess))









    



num instances 119



In [169]:

    
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline 

results = [(i,np.mean(incr_results[i])) for i in incr_results]

r = list(zip(*results))

plt.plot(r[0], r[1])









    Out[169]:





[<matplotlib.lines.Line2D at 0x7f43dca72e10>]

First closest word instead



In [170]:

    
#eval_data['w2v'] = eval_data.word.map(lambda x: w2v[w2v.most_similar([x], topn=1)[0][0]] if x in w2v else np.zeros(vec_size))



In [171]:

    
# this returns it back to normal:
#eval_data['w2v'] = eval_data.word.map(lambda x: w2v[x] if x in w2v else np.zeros(vec_size))



In [172]:

    
import pickle
# now you can save it to a file
with open('ridge_restaurant.pkl', 'wb') as f:
    pickle.dump(model, f)



In [ ]:

	text	bot	o	ind	gid	mask
0	[<SILENCE>]	Hello , welcome to the Cambridge restaurant sy...	trn	1	2	True
1	[i, want, a, moderately, priced, restaurant, i...	api_call R_cuisine west moderate	trn	2	2	True
2	[saint_johns_chop_house, R_post_code, saint_jo...	<unk>	trn	3	2	False
3	[saint_johns_chop_house, R_cuisine, british]	<unk>	trn	4	2	False
4	[saint_johns_chop_house, R_location, west]	<unk>	trn	5	2	False
5	[saint_johns_chop_house, R_phone, saint_johns_...	<unk>	trn	6	2	False
6	[saint_johns_chop_house, R_address, saint_john...	<unk>	trn	7	2	False
7	[saint_johns_chop_house, R_price, moderate]	<unk>	trn	8	2	False
8	[saint_johns_chop_house, R_rating, 3]	<unk>	trn	9	2	False
9	[prezzo, R_post_code, prezzo_post_code]	<unk>	trn	10	2	False
10	[prezzo, R_cuisine, italian]	<unk>	trn	11	2	False
11	[prezzo, R_location, west]	<unk>	trn	12	2	False
12	[prezzo, R_phone, prezzo_phone]	<unk>	trn	13	2	False
13	[prezzo, R_address, prezzo_address]	<unk>	trn	14	2	False
14	[prezzo, R_price, moderate]	<unk>	trn	15	2	False
15	[prezzo, R_rating, 9]	<unk>	trn	16	2	False
16	[<SILENCE>]	prezzo is a nice restaurant in the west of tow...	trn	17	2	False
17	[is, there, anything, else]	You are looking for a restaurant is that right?	trn	18	2	False
18	[give, me, a, different, restaurant]	prezzo is a nice restaurant in the west of tow...	trn	19	2	False
19	[goodbye]	you are welcome	trn	20	2	False
20	[<SILENCE>]	Hello , welcome to the Cambridge restaurant sy...	trn	1	3	True
21	[i, want, a, moderately, priced, restaurant, t...	What kind of food would you like?	trn	2	3	True
22	[canapes, food]	What part of town do you have in mind?	trn	3	3	True
23	[it, doesnt, matter]	api_call canapes R_location moderate	trn	4	3	True
24	[api_call, no, result]	<unk>	trn	5	3	False
25	[<SILENCE>]	yippee_noodle_bar is a nice restaurant in the ...	trn	6	3	False
26	[gastropub, food]	Sorry there is no canapes restaurant in the mo...	trn	7	3	False
27	[i, want, a, gastropub, restaurant]	the cow pizza kitchen and bar is a nice restau...	trn	8	3	False
28	[what, is, the, address]	Sure , the cow pizza kitchen and bar is on the...	trn	9	3	False
29	[what, is, the, phone, number]	The phone number of the cow pizza kitchen and ...	trn	10	3	False
...	...	...	...	...	...	...
70	[pizza_hut_city_centre, R_address, pizza_hut_c...	<unk>	trn	39	4	False
71	[pizza_hut_city_centre, R_price, cheap]	<unk>	trn	40	4	False
72	[pizza_hut_city_centre, R_rating, 10]	<unk>	trn	41	4	False
73	[<SILENCE>]	pizza_hut_city_centre is a nice restaurant in ...	trn	42	4	False
74	[may, i, have, the, address, and, phone, numbe...	Sure , pizza_hut_city_centre is on pizza_hut_c...	trn	43	4	False
75	[and, the, phone, number, please]	The phone number of pizza_hut_city_centre is p...	trn	44	4	False
76	[thank, you, good, bye]	you are welcome	trn	45	4	False
77	[<SILENCE>]	Hello , welcome to the Cambridge restaurant sy...	trn	1	5	True
78	[im, looking, for, canapes, food, in, any, pri...	I am sorry but there is no canapes restaurant ...	trn	2	5	True
79	[how, about, european]	api_call european R_location R_price	trn	3	5	True
80	[eraina, R_post_code, eraina_post_code]	<unk>	trn	4	5	False
81	[eraina, R_cuisine, european]	<unk>	trn	5	5	False
82	[eraina, R_location, centre]	<unk>	trn	6	5	False
83	[eraina, R_phone, eraina_phone]	<unk>	trn	7	5	False
84	[eraina, R_address, eraina_address]	<unk>	trn	8	5	False
85	[eraina, R_price, expensive]	<unk>	trn	9	5	False
86	[eraina, R_rating, 1]	<unk>	trn	10	5	False
87	[riverside_brasserie, R_post_code, riverside_b...	<unk>	trn	11	5	False
88	[riverside_brasserie, R_cuisine, european]	<unk>	trn	12	5	False
89	[riverside_brasserie, R_location, centre]	<unk>	trn	13	5	False
90	[riverside_brasserie, R_phone, riverside_brass...	<unk>	trn	14	5	False
91	[riverside_brasserie, R_address, riverside_bra...	<unk>	trn	15	5	False
92	[riverside_brasserie, R_price, moderate]	<unk>	trn	16	5	False
93	[riverside_brasserie, R_rating, 2]	<unk>	trn	17	5	False
94	[de_luca_cucina_and_bar, R_post_code, de_luca_...	<unk>	trn	18	5	False
95	[de_luca_cucina_and_bar, R_cuisine, european]	<unk>	trn	19	5	False
96	[de_luca_cucina_and_bar, R_location, centre]	<unk>	trn	20	5	False
97	[de_luca_cucina_and_bar, R_phone, de_luca_cuci...	<unk>	trn	21	5	False
98	[de_luca_cucina_and_bar, R_address, de_luca_cu...	<unk>	trn	22	5	False
99	[de_luca_cucina_and_bar, R_price, moderate]	<unk>	trn	23	5	False

	rname	attr_key	attr_value
2126	cote	R_cuisine	french
2127	cote	R_location	centre
2130	cote	R_price	expensive
2262	cote	R_cuisine	french
2263	cote	R_location	centre
2266	cote	R_price	expensive
2461	cote	R_cuisine	french
2462	cote	R_location	centre
2465	cote	R_price	expensive
3869	cote	R_cuisine	french
3870	cote	R_location	centre
3873	cote	R_price	expensive
4772	cote	R_cuisine	french
4773	cote	R_location	centre
4776	cote	R_price	expensive
5662	cote	R_cuisine	french
5663	cote	R_location	centre
5666	cote	R_price	expensive
5810	cote	R_cuisine	french
5811	cote	R_location	centre
5814	cote	R_price	expensive
7222	cote	R_cuisine	french
7223	cote	R_location	centre
7226	cote	R_price	expensive
8453	cote	R_cuisine	french
8454	cote	R_location	centre
8457	cote	R_price	expensive
11582	cote	R_cuisine	french
11583	cote	R_location	centre
11586	cote	R_price	expensive
...	...	...	...
141427	cote	R_cuisine	french
141428	cote	R_location	centre
141431	cote	R_price	expensive
142227	cote	R_cuisine	french
142228	cote	R_location	centre
142231	cote	R_price	expensive
146691	cote	R_cuisine	french
146692	cote	R_location	centre
146695	cote	R_price	expensive
146791	cote	R_cuisine	french
146792	cote	R_location	centre
146795	cote	R_price	expensive
147110	cote	R_cuisine	french
147111	cote	R_location	centre
147114	cote	R_price	expensive
148930	cote	R_cuisine	french
148931	cote	R_location	centre
148934	cote	R_price	expensive
149754	cote	R_cuisine	french
149755	cote	R_location	centre
149758	cote	R_price	expensive
151624	cote	R_cuisine	french
151625	cote	R_location	centre
151628	cote	R_price	expensive
152377	cote	R_cuisine	french
152378	cote	R_location	centre
152381	cote	R_price	expensive
153724	cote	R_cuisine	french
153725	cote	R_location	centre
153728	cote	R_price	expensive

	rname	R_cuisine_african	R_cuisine_asian_oriental	R_cuisine_bistro	R_cuisine_british	R_cuisine_chinese	R_cuisine_european	R_cuisine_french	R_cuisine_fusion	R_cuisine_gastropub	...	R_cuisine_turkish	R_cuisine_vietnamese	R_price_cheap	R_price_expensive	R_price_moderate	R_location_centre	R_location_east	R_location_north	R_location_south	R_location_west
rname
ali_baba	ali_baba	0	0	0	0	0	0	0	0	0	...	0	0	0	0	1	1	0	0	0	0
anatolia	anatolia	0	0	0	0	0	0	0	0	0	...	1	0	0	0	1	1	0	0	0	0
backstreet_bistro	backstreet_bistro	0	0	1	0	0	0	0	0	0	...	0	0	0	1	0	1	0	0	0	0
bangkok_city	bangkok_city	0	0	0	0	0	0	0	0	0	...	0	0	0	1	0	1	0	0	0	0
bedouin	bedouin	1	0	0	0	0	0	0	0	0	...	0	0	0	1	0	1	0	0	0	0
bloomsbury_restaurant	bloomsbury_restaurant	0	0	0	0	0	0	0	0	0	...	0	0	0	0	1	1	0	0	0	0
caffe_uno	caffe_uno	0	0	0	0	0	0	0	0	0	...	0	0	0	1	0	1	0	0	0	0
cambridge_lodge_restaurant	cambridge_lodge_restaurant	0	0	0	0	0	1	0	0	0	...	0	0	0	1	0	0	0	0	0	1
charlie_chan	charlie_chan	0	0	0	0	1	0	0	0	0	...	0	0	1	0	0	1	0	0	0	0
chiquito_restaurant_bar	chiquito_restaurant_bar	0	0	0	0	0	0	0	0	0	...	0	0	0	1	0	0	0	0	1	0

	o	gid	target
16	trn	2	prezzo
24	trn	3	no_result
34	trn	4	no_result
136	trn	5	michaelhouse_cafe
228	trn	6	the_nirala
467	trn	8	the_missing_sock
498	trn	9	no_result
710	trn	10	riverside_brasserie
936	trn	11	da_vinci_pizzeria
948	trn	12	the_lucky_star
986	trn	13	saint_johns_chop_house
1013	trn	14	the_missing_sock
1047	trn	15	the_gandhi
1267	trn	17	no_result
1299	trn	18	royal_spice
1321	trn	19	pizza_hut_cherry_hinton
1329	trn	20	no_result
1507	trn	21	charlie_chan
1527	trn	22	bangkok_city
1540	trn	23	pizza_hut_fen_ditton
1549	trn	24	no_result
1579	trn	25	no_result
1644	trn	26	no_result
1663	trn	27	prezzo
1682	trn	28	da_vinci_pizzeria
1703	trn	29	prezzo
1722	trn	30	prezzo
1742	trn	31	thanh_binh
1779	trn	32	la_margherita
1798	trn	33	frankie_and_bennys
...	...	...	...
152349	tst	3206	efes_restaurant
152370	tst	3207	thanh_binh
152390	tst	3208	cote
152415	tst	3209	thanh_binh
152577	tst	3210	royal_spice
152591	tst	3211	wagamama
152640	tst	3212	shanghai_family_restaurant
152668	tst	3213	the_nirala
152689	tst	3214	royal_spice
152709	tst	3215	pizza_hut_cherry_hinton
152730	tst	3216	la_margherita
152877	tst	3217	nandos
152984	tst	3218	peking_restaurant
153016	tst	3219	efes_restaurant
153039	tst	3220	prezzo
153251	tst	3221	la_tasca
153299	tst	3222	shiraz_restaurant
153313	tst	3223	the_missing_sock
153343	tst	3224	the_gardenia
153408	tst	3225	galleria
153449	tst	3226	yu_garden
153545	tst	3228	gourmet_burger_kitchen
153654	tst	3229	shanghai_family_restaurant
153672	tst	3230	sala_thong
153701	tst	3231	pizza_hut_cherry_hinton
153712	tst	3232	no_result
153737	tst	3233	restaurant_two_two
153901	tst	3234	tandoori_palace
153928	tst	3235	la_margherita
153965	tst	3236	taj_tandoori

	type_R_cuisine	type_european	...	loc_R_location	loc_east	loc_north	loc_south	loc_west	price_R_price	price_cheap	price_moderate
0	1	0	...	0	0	0	0	1	0	0	1
2	1	0	...	0	0	1	0	0	0	1	0
3	1	0	...	0	0	0	1	0	0	1	0
4	0	0	...	1	0	0	0	0	0	1	0
5	0	0	...	1	0	0	0	0	1	0	0
6	0	1	...	1	0	0	0	0	1	0	0
11	1	0	...	1	0	0	0	0	0	1	0
12	0	0	...	1	0	0	0	0	1	0	0
13	1	0	...	0	0	0	0	1	0	1	0
15	1	0	...	0	1	0	0	0	0	1	0

ind	gid	target	word	w2v	o
2	2	prezzo	i	[-0.225586, -0.0195312, 0.0908203, 0.237305, -...	trn
2	2	prezzo	want	[0.136719, 0.148438, 0.114746, 0.0698242, -0.1...	trn
2	2	prezzo	a	[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...	trn
2	2	prezzo	moderately	[0.145508, 0.0294189, -0.289062, 0.279297, 0.0...	trn
2	2	prezzo	priced	[-0.0708008, 0.128906, -0.324219, 0.205078, -0...	trn
2	2	prezzo	restaurant	[-0.148438, -0.124023, 0.0412598, 0.208984, -0...	trn
2	2	prezzo	in	[0.0703125, 0.0869141, 0.0878906, 0.0625, 0.06...	trn
2	2	prezzo	the	[0.0800781, 0.10498, 0.0498047, 0.0534668, -0....	trn
2	2	prezzo	west	[-0.0927734, -0.12207, 0.140625, -0.0649414, 0...	trn
2	2	prezzo	part	[0.0071106, 0.0563965, 0.0106812, 0.0825195, -...	trn

ind	gid	target	word	w2v	o	attrvec
2	2	prezzo	i	[-0.225586, -0.0195312, 0.0908203, 0.237305, -...	trn	[0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, ...
2	2	prezzo	want	[0.136719, 0.148438, 0.114746, 0.0698242, -0.1...	trn	[0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, ...
2	2	prezzo	a	[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...	trn	[0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, ...
2	2	prezzo	moderately	[0.145508, 0.0294189, -0.289062, 0.279297, 0.0...	trn	[0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, ...
2	2	prezzo	priced	[-0.0708008, 0.128906, -0.324219, 0.205078, -0...	trn	[0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, ...

	type_R_cuisine	type_european	...	loc_R_location	loc_east	loc_north	loc_south	loc_west	price_R_price	price_cheap	price_moderate
0	1	0	...	0	0	0	0	1	0	0	1
2	1	0	...	0	0	1	0	0	0	1	0
3	1	0	...	0	0	0	1	0	0	1	0
4	0	0	...	1	0	0	0	0	0	1	0
5	0	0	...	1	0	0	0	0	1	0	0
6	0	1	...	1	0	0	0	0	1	0	0
11	1	0	...	1	0	0	0	0	0	1	0
12	0	0	...	1	0	0	0	0	1	0	0
13	1	0	...	0	0	0	0	1	0	1	0
15	1	0	...	0	1	0	0	0	0	1	0

	type_R_cuisine	type_european	...	loc_R_location	loc_east	loc_north	loc_south	loc_west	price_R_price	price_cheap	price_moderate
0	1	0	...	0	0	0	0	1	0	0	1
2	1	0	...	0	0	1	0	0	0	1	0
3	1	0	...	0	0	0	1	0	0	1	0
4	0	0	...	1	0	0	0	0	0	1	0
5	0	0	...	1	0	0	0	0	1	0	0
6	0	1	...	1	0	0	0	0	1	0	0
11	1	0	...	1	0	0	0	0	0	1	0
12	0	0	...	1	0	0	0	0	1	0	0
13	1	0	...	0	0	0	0	1	0	1	0
15	1	0	...	0	1	0	0	0	0	1	0