In [2]:
from lib import MongoBackend
from matplotlib import pyplot as plt
from matplotlib.pyplot import show
backend = MongoBackend('master')

Mahout TF-IDF


In [3]:
for slaves in 2,5,7,9:
    query = ({'slaves':slaves}, {'documents':1, 'time':1})
    backend.plot_query("mahout_tfidf_multi", query, title="Mahout TF-IDF (minDF=10)", \
                       ylabel='time (sec)', xlabel='# documents', show_plot=False, label="%d nodes"%slaves)
show()


/usr/local/lib/python2.7/dist-packages/matplotlib/axes/_axes.py:475: UserWarning: No labelled objects found. Use label='...' kwarg on individual plots.
  warnings.warn("No labelled objects found. "

Mahout Kmeans


In [1]:
for slaves in 2,5,7,9:
    query = ({'slaves':slaves}, {'documents':1, 'time':1})
    backend.plot_query("mahout_tfidf_multi", query, title="Mahout KMeans (K=20)", \
                       ylabel='time (sec)', xlabel='# documents', show_plot=False, label="%d nodes"%slaves)
show()


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-1-5b0bf594de20> in <module>()
      1 for slaves in 2,5,7,9:
      2     query = ({'slaves':slaves}, {'documents':1, 'time':1})
----> 3     backend.plot_query("mahout_tfidf_multi", query, title="Mahout KMeans (K=20)",                        ylabel='time (sec)', xlabel='# documents', show_plot=False, label="%d nodes"%slaves)
      4 show()

NameError: name 'backend' is not defined

In [21]:
## Spark TF-IDF

In [22]:
for slaves in 2,5,7,9:
    query = ({'slaves':slaves}, {'documents':1, 'time':1})
    backend.plot_query("spark_tfidf_multi", query, title="Spark TF-IDF (minDF=10)", \
                       ylabel='time (sec)', xlabel='# documents', show_plot=False, label="%d nodes"%slaves)
show()

In [24]:
## Spark KMeans

In [26]:
for slaves in 2,5,7,9:
    query = ({'slaves':slaves}, {'documents':1, 'time':1})
    backend.plot_query("spark_kmeans_multi", query, title="Spark K-Means (minDF=10)", \
                       ylabel='time (sec)', xlabel='# documents', show_plot=False, label="%d nodes"%slaves)
show()

In [ ]: