In [1]:
%pylab inline


Populating the interactive namespace from numpy and matplotlib

In [2]:
import sys; sys.path.append('/Volumes/ExtendedHD/Users/stevo/git/lt.core/lt.ltbot/src/main/scripts/')
from plot_perplexity import *

In [3]:
def readtuples(fn):
    i = 1
    with open(fn,'r') as fh:
        for line in fh:
            i = i+1
            lm, url, pp = line.strip().split('\t', 2)
            yield (i, pp, '', url) # plot_perplexity expects a tuple with four entries of the form (timestamp,perplexity,text,url)

In [3]:


In [11]:
tuples_f_woov = readtuples('/Volumes/ExtendedHD/Users/stevo/Workspaces/lm/eval/2015-03-02/docperp_f_woov.txt')
tuples_f_noov = readtuples('/Volumes/ExtendedHD/Users/stevo/Workspaces/lm/eval/2015-03-02/docperp_f_noov.txt')
tuples_nf_woov = readtuples('/Volumes/ExtendedHD/Users/stevo/Workspaces/lm/eval/2015-03-02/docperp_nf_woov.txt')
tuples_nf_noov = readtuples('/Volumes/ExtendedHD/Users/stevo/Workspaces/lm/eval/2015-03-02/docperp_nf_noov.txt')

n_f, bins_f, _ = plot_perp_hist(tuples_f_woov, count=10000, bins=100, range=(0,10000000), log=True);
plt.ylim((0,1e6))
# figure()
# n, bins, _ = plot_perp_hist(tuples_f_noov, bins=100, range=(0,100000));

figure()
n_nf, bins_nf, _ = plot_perp_hist(tuples_nf_woov, count=10000, bins=100, range=(0,10000000), log=True);
plt.ylim((0,1e6))
# figure()
# n, bins, _ = plot_perp_hist(tuples_nf_noov, bins=100, range=(0,100000));


{'log': True, 'range': (0, 10000000), 'bins': 100}
{'log': True, 'range': (0, 10000000), 'bins': 100}

In [12]:
print(n_f.sum())
print(n_nf.sum())
print(n_f[:10])
print(n_nf[:10])


9937.0
7678.0
[ 8365.   533.   262.   124.    94.    67.    46.    41.    35.    30.]
[ 2486.   339.   195.   180.   166.   167.   170.   129.   145.   131.]

In [58]:
tuples_f_woov = readtuples('/Volumes/ExtendedHD/Users/stevo/Workspaces/lm/eval/2015-03-02/docperp_f_woov.txt')
tuples_f_noov = readtuples('/Volumes/ExtendedHD/Users/stevo/Workspaces/lm/eval/2015-03-02/docperp_f_noov.txt')
tuples_nf_woov = readtuples('/Volumes/ExtendedHD/Users/stevo/Workspaces/lm/eval/2015-03-02/docperp_nf_woov.txt')
tuples_nf_noov = readtuples('/Volumes/ExtendedHD/Users/stevo/Workspaces/lm/eval/2015-03-02/docperp_nf_noov.txt')

figure(figsize=(15,5))
plot_time_vs_perp(tuples_f_woov, 
                  binsize=1, 
                  xlimits=(0,1000), 
                  polydeg=0, 
                  avgfun=np.median, 
                  ylimits=(0,1e5), 
                  xtickstep=100, 
                  avg_line_binsize=10,
                  limit_avg=False, 
                  plot_dots=True, 
                  show_total_average=False);
#gca().set_yscale('log')


plotting perplexity values as a function over time.
total number of datapoints collected: 222046; binsize: 1; number of new (averaged) datapoints: 222046; limits: (0, 1000); number of averaged datapoints showing: 1000;

In [11]:


In [ ]: