In [ ]:
from __future__ import division
import codecs
import pickle
import networkx as nx
from collections import Counter
from scipy.stats import kurtosis, skew, variation
rcParams['figure.figsize'] = (12.0, 10.0)
rcParams['font.family'] = 'Times New Roman'
In [ ]:
from os.path import abspath
workspace = "/".join(abspath('.').split('/')[:-1])
Note: Make sure that your workspace sees the root directory of openie_eval.
In [ ]:
from openie_eval.openie_eval import semantic_parsing as sp
from openie_eval.openie_eval import ontologization
reload(sp)
reload(ontologization)
from nltk.stem import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()
In [ ]:
#keyword = 'carnatic_music'
keyword = 'hindustani_music'
#coref_suffix = ''
coref_suffix = '-coref'
filtered_suffix = ''
#filtered_suffix = '-filtered'
In [ ]:
methods = ['reverb', 'openie', 'semantic-parsing']
labels = ['ReVerb', 'OpenIE 4.0', 'Sem. Parsing']
colors = ['#990033', '#006600', '#330066']
In [ ]:
x = arange(0, 100, 1)
for i in xrange(len(colors)):
y = (i+1)*x
plot(x, y, color=colors[i])
In [ ]:
def get_sentence_relations(relations):
sentence_relations = {}
for rel in relations:
if rel['full_sentence'] in sentence_relations.keys():
sentence_relations[rel['full_sentence']].append(rel['arg1'] + rel['rel'] + rel['arg2'])
else:
sentence_relations[rel['full_sentence']] = [rel['arg1'] + rel['rel'] + rel['arg2']]
return sentence_relations
In [ ]:
figure()
for count in xrange(len(methods)):
method = methods[count]
input_file = workspace+'/data/'+method+'/' + keyword + '/relations'+coref_suffix+'.pickle'
relations = pickle.load(file(input_file))
sentence_relations = get_sentence_relations(relations)
n_extractions = [len(v) for v in sentence_relations.values()]
x = arange(1, 30, 1)
y = [sum(n_extractions >= i) for i in x]
log_y = log10(y)
log_y[isinf(log_y)] = 0
# n_uniq_extractions = [len(set(v)) for v in sentence_relations.values()]
# uniq_y = [sum(n_extractions >= i) for i in x]
# log_uniq_y = log10(uniq_y)
# log_uniq_y[isinf(log_uniq_y)] = 0
plot(x, log_y, '-', color=colors[count], marker='.', label=labels[count], linewidth=2.5)
# plot(x, log_uniq_y, '--', color=colors[count], marker='.', label=labels[count])
hold(True)
_mean = mean(n_extractions)
_variance = variation(n_extractions)
print method, _mean, _variance
legend()
grid()
In [ ]:
fontsize=32
xlabel('Min. no. of extractions', fontsize=fontsize+2)
ylabel('Log. count of sentences', fontsize=fontsize+2)
xlim(1, 15)
ylim(0.69, 3.95)
legend(loc=1, prop={'size': fontsize})
xticks(fontsize=fontsize)
yticks(fontsize=fontsize)
In [ ]:
fname = workspace+'/data/results/quantitative/'+keyword+'/extrations-per-sentence'
savefig(fname+'.pdf', dpi=200, facecolor='w', edgecolor='w', orientation='landscape',
papertype=None, format=None, transparent=False, bbox_inches='tight', pad_inches=0.1)
savefig(fname+'.png', dpi=200, facecolor='w', edgecolor='w', orientation='landscape',
papertype=None, format=None, transparent=False, bbox_inches='tight', pad_inches=0.1)
In [ ]:
from collections import Counter
In [ ]:
def get_uniq_relations(relations):
uniq_relations = []
uniq_relphrases = []
for rel in relations:
relation = [rel['arg1'], rel['rel'], rel['arg2']]
relphrase = " ".join(relation)
if relphrase not in uniq_relphrases:
uniq_relphrases.append(relphrase)
uniq_relations.append(relation)
return uniq_relations
In [ ]:
fig = figure()
ax = fig.add_subplot(1,1,1)
for count in xrange(len(methods)):
method = methods[count]
input_file = workspace+'/data/'+method+'/' + keyword + '/relations'+coref_suffix+'.pickle'
#all relations
relations = pickle.load(file(input_file))
arguments = [rel['arg1'] for rel in relations]
counter = Counter(arguments)
n_extractions = array(counter.values())
x = concatenate((arange(0, 100, 20), arange(100, 1501, 50)))
y = [sum(n_extractions >= i) for i in x]
log_y = log10(y)
log_y[isinf(log_y)] = 0
#log_y = log_y/max(log_y)
plot(x, log_y, '-', color=colors[count], marker='.', label=labels[count], linewidth=2.5)
_mean = mean(n_extractions)
_variance = variation(n_extractions)
print method, _mean, _variance
print y
#uniq relations
relations = get_uniq_relations(relations)
arguments = [rel[0] for rel in relations]
counter = Counter(arguments)
n_extractions = array(counter.values())
x = concatenate((arange(0, 100, 20), arange(100, 1501, 50)))
x[0] = 1
y = [sum(n_extractions >= i) for i in x]
log_y = log10(y)
log_y[isinf(log_y)] = 0
#log_y = log_y/max(log_y)
plot(x, log_y, '--', color=colors[count], marker='.', linewidth=2.5)
_mean = mean(n_extractions)
_variance = variation(n_extractions)
print method, _mean, _variance
print y
grid()
In [ ]:
fontsize=32
xlim(1, 350)
xlabel('Min. no. of extractions', fontsize=fontsize+2)
ylabel('Log. count of entities', fontsize=fontsize+2)
#Get artists and labels for legend
handles, _labels = ax.get_legend_handles_labels()
#Create custom artists
custom_artists = []
custom_artists.append(plt.Line2D((0,1),(0,0), color='k', linestyle='-'))
custom_artists.append(plt.Line2D((0,1),(0,0), color='k', linestyle='--'))
ax.legend(handles+custom_artists,
_labels + ['All', 'Unique'],
loc='upper right', prop={'size': fontsize})
xticks(fontsize=fontsize)
yticks(fontsize=fontsize)
In [ ]:
fname = workspace+'/data/results/quantitative/'+keyword+'/extrations-per-argument'
savefig(fname+'.pdf', dpi=200, facecolor='w', edgecolor='w', orientation='landscape',
papertype=None, format=None, transparent=False, bbox_inches='tight', pad_inches=0.1)
savefig(fname+'.png', dpi=200, facecolor='w', edgecolor='w', orientation='landscape',
papertype=None, format=None, transparent=False, bbox_inches='tight', pad_inches=0.1)
In [ ]:
fig = figure()
ax = fig.add_subplot(1,1,1)
for count in xrange(len(methods)):
method = methods[count]
label = labels[count]
input_file = workspace+'/data/'+method+'/' + keyword + '/relations'+coref_suffix+'.pickle'
relations = pickle.load(file(input_file))
relation_types = [lemmatizer.lemmatize(rel['rel'], pos='v') for rel in relations]
counter = Counter(relation_types)
n_extractions = array(counter.values())
x = concatenate((arange(0, 100, 20), arange(100, 1201, 50)))
y = [sum(n_extractions >= i) for i in x]
log_y = log10(y)
log_y[isinf(log_y)] = 0
#log_y = log_y/max(log_y)
plot(x, log_y, '-', color=colors[count], marker='.', label=labels[count], linewidth=2.5)
_mean = mean(n_extractions)
_variance = variation(n_extractions)
print method, _mean, _variance
relations = get_uniq_relations(relations)
relation_types = [lemmatizer.lemmatize(rel[1], pos='v') for rel in relations]
counter = Counter(relation_types)
n_extractions = array(counter.values())
x = concatenate((arange(0, 100, 20), arange(100, 1201, 50)))
y = [sum(n_extractions >= i) for i in x]
log_y = log10(y)
log_y[isinf(log_y)] = 0
#log_y = log_y/max(log_y)
plot(x, log_y, '--', color=colors[count], marker='.', linewidth=2.5)
_mean = mean(n_extractions)
_variance = variation(n_extractions)
print method, _mean, _variance
grid()
In [ ]:
fontsize=32
xlim(1, 500)
xlabel('Min. no. of extractions', fontsize=fontsize+2)
ylabel('Log. count of relation types', fontsize=fontsize+2)
#Get artists and labels for legend
handles, _labels = ax.get_legend_handles_labels()
#Create custom artists
custom_artists = []
custom_artists.append(plt.Line2D((0,1),(0,0), color='k', linestyle='-'))
custom_artists.append(plt.Line2D((0,1),(0,0), color='k', linestyle='--'))
ax.legend(handles+custom_artists,
_labels + ['All', 'Unique'],
loc='upper right', prop={'size': fontsize})
xticks(fontsize=fontsize)
yticks(fontsize=fontsize)
In [ ]:
fname = workspace+'/data/results/quantitative/'+keyword+'/extrations-per-reltype'
savefig(fname+'.pdf', dpi=200, facecolor='w', edgecolor='w', orientation='landscape',
papertype=None, format=None, transparent=False, bbox_inches='tight', pad_inches=0.1)
savefig(fname+'.png', dpi=200, facecolor='w', edgecolor='w', orientation='landscape',
papertype=None, format=None, transparent=False, bbox_inches='tight', pad_inches=0.1)
In [ ]:
fig = figure()
ax = fig.add_subplot(1,1,1)
for count in xrange(len(methods)):
method = methods[count]
input_file = workspace+'/data/'+method+'/' + keyword + '/relations'+coref_suffix+'.pickle'
#all relations
relations = pickle.load(file(input_file))
classes = []
for r in relations:
if lemmatizer.lemmatize(r['rel'], pos='v') == 'is a' or lemmatizer.lemmatize(r['rel'], pos='v') == 'be':
classes.append(r['arg2'])
counter = Counter(classes)
n_extractions = array(counter.values())
x = concatenate((arange(0, 100, 20), arange(100, 501, 50)))
y = [sum(n_extractions >= i) for i in x]
log_y = log10(y)
log_y[isinf(log_y)] = 0
#log_y = log_y/max(log_y)
plot(x, log_y, '-', color=colors[count], marker='.', label=labels[count], linewidth=2.5)
_mean = mean(n_extractions)
_variance = variation(n_extractions)
print method, _mean, _variance
print y
#uniq relations
relations = get_uniq_relations(relations)
classes = []
for r in relations:
if lemmatizer.lemmatize(r[1], pos='v') == 'is a' or lemmatizer.lemmatize(r[1], pos='v') == 'be':
classes.append(r[2])
counter = Counter(classes)
n_extractions = array(counter.values())
x = concatenate((arange(0, 100, 20), arange(100, 501, 50)))
x[0] = 1
y = [sum(n_extractions >= i) for i in x]
log_y = log10(y)
log_y[isinf(log_y)] = 0
#log_y = log_y/max(log_y)
plot(x, log_y, '--', color=colors[count], marker='.', linewidth=2.5)
_mean = mean(n_extractions)
_variance = variation(n_extractions)
print method, _mean, _variance
print y
grid()
In [ ]:
fontsize=32
xlim(1, 100)
xlabel('Min. no. of extractions', fontsize=fontsize+2)
ylabel('Log. count of concepts', fontsize=fontsize+2)
#Get artists and labels for legend
handles, _labels = ax.get_legend_handles_labels()
#Create custom artists
custom_artists = []
custom_artists.append(plt.Line2D((0,1),(0,0), color='k', linestyle='-'))
custom_artists.append(plt.Line2D((0,1),(0,0), color='k', linestyle='--'))
ax.legend(handles+custom_artists,
_labels + ['All', 'Unique'],
loc='upper right', prop={'size': fontsize})
xticks(fontsize=fontsize)
yticks(fontsize=fontsize)
In [ ]:
fname = workspace+'/data/results/quantitative/'+keyword+'/extrations-per-class'
savefig(fname+'.pdf', dpi=200, facecolor='w', edgecolor='w', orientation='landscape',
papertype=None, format=None, transparent=False, bbox_inches='tight', pad_inches=0.1)
savefig(fname+'.png', dpi=200, facecolor='w', edgecolor='w', orientation='landscape',
papertype=None, format=None, transparent=False, bbox_inches='tight', pad_inches=0.1)
In [ ]: