In [ ]:
from plotly.offline import download_plotlyjs, init_notebook_mode, iplot
from plotly.graph_objs import *
import numpy as np

init_notebook_mode()

In [ ]:
def get_result(path):
    with open('timings/' + path) as f:
        results = [float(line.rstrip('\n'))/1E9 for line in f]
    return results[0], np.mean(results[1:])
def get_results(size):
    return get_result('%d/hadooplr.txt' % size), get_result('%d/sparklr.txt' % size)

x = [25, 100]
results = [get_results(size) for size in x]
print results

In [ ]:
data = [Bar(x=x, y=map(lambda r: r[i-1][1], results), name='Hadoop' if i == 1 else 'Spark') for i in range(2)]
layout = Layout(xaxis=dict(title='Number of machines'), yaxis=dict(title='Iteration time after first (s)'))
iplot(Figure(data=data, layout=layout))

In [ ]:
data = [Bar(x=x, y=map(lambda r: r[1][1], results), name='Spark')]
layout = Layout(xaxis=dict(title='Number of machines'), yaxis=dict(title='Iteration time after first (s)'))
iplot(Figure(data=data, layout=layout))

In [ ]:
data = [Bar(x=['Hadoop', 'Spark'], y=map(lambda r: r[i], results[-1]), name='First iteration' if i == 0 else 'Later iterations') for i in range(2)]
layout = Layout(xaxis=dict(title='Logistic Regression'), yaxis=dict(title='Iteration time (s)'))
iplot(Figure(data=data, layout=layout))