In [23]:
from lib import MongoBackend
from matplotlib import pyplot as plt
from matplotlib.pyplot import show
backend = MongoBackend('master')

Centralized Preproc


In [27]:
query = ({}, {'lines':1, 'time':1})
backend.plot_query("imr_preprocess", query, title="Centralized Preprocess",
                   ylabel='time (sec)', xlabel='#entries',
                   label="exec. time")

Centralized W2V Train


In [29]:
backend.plot_query("imr_w2v_train", query, '-x', title="W2V Centralized Train",
                   ylabel='time (sec)', xlabel='#entries',label="exec. time")

### alternative implementation ###
# res = backend.query("imr_w2v_train", query)
# res = zip(*res)[1:]

# import matplotlib.pyplot as plt
# plt.plot(res[0],res[1], alpha=0.3)
# plt.scatter(res[0],res[1], marker='D',s=50, color='red')


plt.show()

Centralized W2V Vectorize


In [30]:
backend.plot_query("imr_w2v_vectorize", query, title="W2V Centralized Vectorization",
                   ylabel='time (sec)', xlabel='# documents',
                   label="exec. time")

Centralized Classifier Train


In [31]:
backend.plot_query("imr_train", query, title="Train Centralized Classifier",
                   ylabel='time (sec)', xlabel='# documents',
                   label="exec. time")

Centralized Classification


In [ ]:
backend.plot_query("imr_predict", query, title="Predict with Centralized Classifier",
                   ylabel='time (sec)', xlabel='# documents',
                   label="exec. time")

Spark W2V Train


In [4]:
query = ({}, {'lines':1, 'time':1})
backend.plot_query("imr_w2v_train_spark", query, title="Spark W2V Train",
                   ylabel='time (sec)', xlabel='#entries',
                   label="exec. time")

Spark W2V Vectorize


In [8]:
backend.plot_query("imr_w2v_vectorize_spark", query, title="Spark W2V Vectorization",
                   ylabel='time (sec)', xlabel='# documents',
                   label="exec. time")

Spark Classifier Train


In [7]:
backend.plot_query("imr_lr_train_spark", query, title="Train Spark Classifier",
                   ylabel='time (sec)', xlabel='# documents',
                   label="exec. time")

Spark Classification


In [10]:
backend.plot_query("imr_classify_spark", query, title="Predict with Spark Classifier",
                   ylabel='time (sec)', xlabel='# documents',
                   label="exec. time")

Compararative

Classifier train


In [25]:
backend.plot_query("imr_train", query, title="Classifier Training",
                   ylabel='time (sec)', xlabel='# documents',
                   label="Centralized", show_plot=False)
backend.plot_query("imr_lr_train_spark", query,
                   ylabel='time (sec)', xlabel='# documents',
                   label="Spark")

Classify


In [26]:
backend.plot_query("imr_predict", query, title="Predict using Model",
                   ylabel='time (sec)', xlabel='# documents',
                   label="Centralized", show_plot=False)
backend.plot_query("imr_classify_spark", query,
                   ylabel='time (sec)', xlabel='# documents',
                   label="Spark")

W2V train


In [27]:
query = ({}, {'lines':1, 'time':1})
backend.plot_query("imr_w2v_train", query, '-x', title="W2V Train", show_plot=False,
                   ylabel='time (sec)', xlabel='#entries',label="Centralized")
backend.plot_query("imr_w2v_train_spark", query,
                   ylabel='time (sec)', xlabel='#entries',
                   label="Spark")

W2V Vectorize


In [28]:
backend.plot_query("imr_w2v_vectorize", query, title="W2V Vectorization",
                   ylabel='Centralized', xlabel='# documents', show_plot=False,
                   label="Centralized")


backend.plot_query("imr_w2v_vectorize_spark", query, 
                   ylabel='time (sec)', xlabel='# documents',
                   label="Spark")