In [1]:
%pylab inline
In [2]:
import pandas
import seaborn
import src
import os
In [3]:
projects = src.main.load_projects({"level": "method"})
def read_dataframe(kind):
data = dict()
for num_topics in range(25, 525, 25):
subdata = dict()
projects = src.main.load_projects({"level": "method", "num_topics": num_topics}) # get this num_topic
for project in projects:
ranks = src.main.read_ranks(project, kind)
with open(os.path.join(project.full_path, 'ids.txt')) as f:
ids = [x.strip() for x in f.readlines()]
frms = [x for x,y,z in src.main.get_frms(ranks) if y in ids]
mrr = src.utils.calculate_mrr(frms)
subdata[project.printable_name + " " + project.version] = mrr
data[num_topics] = subdata
return pandas.DataFrame(data)
In [4]:
lda = read_dataframe("release_lda")
vec = read_dataframe("release_vec")
vec_sums = read_dataframe("release_vec_sums")
In [5]:
lda.T.plot()
Out[5]:
In [6]:
lda
Out[6]:
In [7]:
vec.T.plot()
Out[7]:
In [8]:
vec
Out[8]:
In [9]:
vec_sums.T.plot()
Out[9]:
In [10]:
vec_sums
Out[10]:
In [11]:
df = pandas.DataFrame({"lda": lda.mean(), "vec": vec.mean(), "vec_sums": vec_sums.mean()})
df
Out[11]:
In [12]:
df.plot()
Out[12]:
In [13]:
with open('icsme15era/tables/lda_mrr.tex', 'w') as f:
lda.to_latex(f, float_format=lambda x: "%.4f" % x)
with open('icsme15era/tables/vec_mrr.tex', 'w') as f:
vec.to_latex(f, float_format=lambda x: "%.4f" % x)
with open('icsme15era/tables/vec_sums_mrr.tex', 'w') as f:
vec_sums.to_latex(f, float_format=lambda x: "%.4f" % x)
In [14]:
lda.to_latex(float_format=lambda x: "%.4f" % x)
Out[14]:
In [15]:
times = pandas.read_csv('times100.csv', converters={
"start": lambda x: pandas.Timestamp(x),
"finish": lambda x: pandas.Timestamp(x)})
info = pandas.read_csv('subjectsizes.csv')
In [16]:
info
Out[16]:
In [17]:
times["total"] = times.finish - times.start
del times["start"]
del times["finish"]
In [18]:
training = times[(times.type == "train")]
del training["type"]
training
Out[18]:
In [19]:
print(training[training.model == "lda"].to_latex())
In [20]:
vstimes = times[times.model == "vec_sums"]
vstimes = vstimes.merge(info, on="system")
vstimes.total / vstimes.queries
Out[20]:
In [21]:
vectimes = times[times.model == "vec"]
vectimes = vectimes.merge(info, on="system")
vectimes.total / vectimes.queries
Out[21]:
In [22]:
vectimes = times[times.model == "vec"].groupby(["system"]).sum()
del vectimes["model"]
del vectimes["type"]
vectimes['queries'] = info.groupby("system").sum()["queries"]
In [23]:
vectimes.total / vectimes.queries
Out[23]:
In [24]:
ldatimes = times[times.model == "lda"].groupby(["system"]).sum()
del ldatimes["model"]
del ldatimes["type"]
ldatimes['queries'] = info.groupby("system").sum()["queries"]
ldatimes.total / ldatimes.queries
Out[24]:
In [26]:
fig, axes = plt.subplots(nrows=6, figsize=(3,5) )
plt.subplots_adjust(hspace=0.05)
for i, project in enumerate(projects):
n = project.printable_name + " " + project.version
df = pandas.DataFrame({"LDA": lda.ix[n], "DV Inf.": vec.ix[n], "DV Sum.": vec_sums.ix[n]})
xaxis = axes[i].get_xaxis()
yaxis = axes[i].get_yaxis()
a = axes[i]
if i != 0:
xaxis.set_ticklabels(['','','','',''])
else:
xaxis.set_ticklabels(['', 100, 200, 300, 400,''])
yaxis.set_ticklabels(['',''])
yaxis.tick_right()
xaxis.tick_top()
df.plot(subplots=False,
ax=axes[i],
#xticks=[25,100,200,300,400,500],
yticks=[0, 0.15],
legend=False)
axes[0].legend(loc='upper center', bbox_to_anchor=(0.5, 2),
fancybox=True, shadow=True, ncol=3, columnspacing=1, handlelength=1, handletextpad=0.5)
plt.savefig("icsme15era/figures/mrr_graph.pdf", bbox_inches='tight')
In [ ]: