In [1]:
%pylab inline
In [2]:
import sys; sys.path.append('/Volumes/ExtendedHD/Users/stevo/git/lt.core/lt.ltbot/src/main/scripts/')
from plot_perplexity import *
In [3]:
def readtuples(fn):
i = 1
with open(fn,'r') as fh:
for line in fh:
i = i+1
lm, url, pp = line.strip().split('\t', 2)
yield (i, pp, '', url) # plot_perplexity expects a tuple with four entries of the form (timestamp,perplexity,text,url)
In [3]:
In [11]:
tuples_f_woov = readtuples('/Volumes/ExtendedHD/Users/stevo/Workspaces/lm/eval/2015-03-02/docperp_f_woov.txt')
tuples_f_noov = readtuples('/Volumes/ExtendedHD/Users/stevo/Workspaces/lm/eval/2015-03-02/docperp_f_noov.txt')
tuples_nf_woov = readtuples('/Volumes/ExtendedHD/Users/stevo/Workspaces/lm/eval/2015-03-02/docperp_nf_woov.txt')
tuples_nf_noov = readtuples('/Volumes/ExtendedHD/Users/stevo/Workspaces/lm/eval/2015-03-02/docperp_nf_noov.txt')
n_f, bins_f, _ = plot_perp_hist(tuples_f_woov, count=10000, bins=100, range=(0,10000000), log=True);
plt.ylim((0,1e6))
# figure()
# n, bins, _ = plot_perp_hist(tuples_f_noov, bins=100, range=(0,100000));
figure()
n_nf, bins_nf, _ = plot_perp_hist(tuples_nf_woov, count=10000, bins=100, range=(0,10000000), log=True);
plt.ylim((0,1e6))
# figure()
# n, bins, _ = plot_perp_hist(tuples_nf_noov, bins=100, range=(0,100000));
In [12]:
print(n_f.sum())
print(n_nf.sum())
print(n_f[:10])
print(n_nf[:10])
In [58]:
tuples_f_woov = readtuples('/Volumes/ExtendedHD/Users/stevo/Workspaces/lm/eval/2015-03-02/docperp_f_woov.txt')
tuples_f_noov = readtuples('/Volumes/ExtendedHD/Users/stevo/Workspaces/lm/eval/2015-03-02/docperp_f_noov.txt')
tuples_nf_woov = readtuples('/Volumes/ExtendedHD/Users/stevo/Workspaces/lm/eval/2015-03-02/docperp_nf_woov.txt')
tuples_nf_noov = readtuples('/Volumes/ExtendedHD/Users/stevo/Workspaces/lm/eval/2015-03-02/docperp_nf_noov.txt')
figure(figsize=(15,5))
plot_time_vs_perp(tuples_f_woov,
binsize=1,
xlimits=(0,1000),
polydeg=0,
avgfun=np.median,
ylimits=(0,1e5),
xtickstep=100,
avg_line_binsize=10,
limit_avg=False,
plot_dots=True,
show_total_average=False);
#gca().set_yscale('log')
In [11]:
In [ ]: