In [2]:
from lib import MongoBackend
from matplotlib import pyplot as plt
from matplotlib.pyplot import show
backend = MongoBackend('master')
In [3]:
for slaves in 2,5,7,9:
query = ({'slaves':slaves}, {'documents':1, 'time':1})
backend.plot_query("mahout_tfidf_multi", query, title="Mahout TF-IDF (minDF=10)", \
ylabel='time (sec)', xlabel='# documents', show_plot=False, label="%d nodes"%slaves)
show()
In [1]:
for slaves in 2,5,7,9:
query = ({'slaves':slaves}, {'documents':1, 'time':1})
backend.plot_query("mahout_tfidf_multi", query, title="Mahout KMeans (K=20)", \
ylabel='time (sec)', xlabel='# documents', show_plot=False, label="%d nodes"%slaves)
show()
In [21]:
## Spark TF-IDF
In [22]:
for slaves in 2,5,7,9:
query = ({'slaves':slaves}, {'documents':1, 'time':1})
backend.plot_query("spark_tfidf_multi", query, title="Spark TF-IDF (minDF=10)", \
ylabel='time (sec)', xlabel='# documents', show_plot=False, label="%d nodes"%slaves)
show()
In [24]:
## Spark KMeans
In [26]:
for slaves in 2,5,7,9:
query = ({'slaves':slaves}, {'documents':1, 'time':1})
backend.plot_query("spark_kmeans_multi", query, title="Spark K-Means (minDF=10)", \
ylabel='time (sec)', xlabel='# documents', show_plot=False, label="%d nodes"%slaves)
show()
In [ ]: