In [190]:
import numpy as np
import pandas as pd
from nltk.corpus import framenet as fn
In [2]:
def get_lus(frame):
"""Helper to get lexemes from frame."""
lus = frame['lexUnit'].keys()
return [k.partition('.')[0] for k in lus]
In [3]:
all_frames = fn.frames('.*')
all_frame_names = [f.name for f in all_frames]
all_lus = [get_lus(f) for f in all_frames]
all_lus = [item for sublist in all_lus for item in sublist]
all_lus = list(set(all_lus))
In [182]:
evoke = pd.DataFrame(0, index=all_frame_names, columns=all_lus)
for frame in all_frames:
name = frame.name
lus = get_lus(frame)
for lu in lus:
evoke[lu][name] += 1
Most words evoke one frame, some two, few three.
In [5]:
evoke.max().value_counts()
Out[5]:
In [6]:
evoke.head()
Out[6]:
In [77]:
def evokes(frame):
"""Return words that evoke `frame`."""
lus = frame['lexUnit'].keys()
return [k.partition('.')[0] for k in lus]
In [38]:
def is_inheritance_relation(relation):
return relation['type']['name'] == 'Inheritance'
In [101]:
def is_parent_frame(frame, relation):
return frame.name == relation.superFrameName
In [146]:
def children(frame):
"""Return children of `frame`."""
relations = frame.frameRelations
relations = [r for r in relations if is_inheritance_relation(r)]
relations = [r for r in relations if is_parent_frame(frame, r)]
return [fn.frame(r.subFrameName) for r in relations]
In [104]:
def flatten(lst):
return [item for sublist in lst for item in sublist]
In [147]:
def words(frame):
"""Return all words that evoke `frame`, including words that
evoke frames that inherit from `frame`."""
kids = children(frame)
if not kids:
return evokes(frame)
evoke_sub_frames = [words(f) for f in kids]
return evokes(frame) + flatten(evoke_sub_frames)
In [183]:
relations = pd.DataFrame(0, index=all_frame_names, columns=all_lus)
for frame in all_frames:
name = frame.name
lus = words(frame)
for lu in lus:
relations.loc[name, lu] += 1
In [187]:
relations.head()
Out[187]:
In [196]:
(relations.size - np.count_nonzero(relations.values))/relations.size
Out[196]:
In [205]:
relations.sum(axis=1).sort_values(ascending=False).head()
Out[205]:
In [211]:
relations.loc['Transitive_action'].sort_values(ascending=False).head()
Out[211]:
In [209]:
relations.to_csv('framenet-relations.csv')
In [226]:
normalized_relations = relations / relations.sum()
normalized_relations.to_csv('framenet-normalized-relations.csv')