In [1]:
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
import json

In [17]:
results = json.load(open("/work/eng/eliavb/all_distance_sketch/result.json"))

In [18]:
df = pd.DataFrame.from_dict(results)

In [19]:
df.head()


Out[19]:
data_set num_threads time
0 /users/eng/eliavb/data/facebook 1 1037
1 /users/eng/eliavb/data/facebook 2 928
2 /users/eng/eliavb/data/facebook 3 708
3 /users/eng/eliavb/data/facebook 4 658
4 /users/eng/eliavb/data/facebook 5 606

In [20]:
data_sets = list(set(df.data_set.values))
for data_set_ in data_sets:
    df_ = df[df.data_set == data_set_]
    single_thread_time = df_[df_.num_threads == 1].time.values[0]
    print data_set_, single_thread_time
    df.ix[df.data_set == data_set_, "time"] = df[df.data_set == data_set_]["time"] / single_thread_time


/users/eng/eliavb/data/live_journal 4486329
/users/eng/eliavb/data/facebook 1037.0
/users/eng/eliavb/data/tweeter 287774.0
/users/eng/eliavb/data/youtube 466319.0
/users/eng/eliavb/data/slashdot 20035.0

In [7]:
df.head()


Out[7]:
data_set num_threads time
0 /users/eng/eliavb/data/facebook 1 1.000000
1 /users/eng/eliavb/data/facebook 2 0.894889
2 /users/eng/eliavb/data/facebook 3 0.682739
3 /users/eng/eliavb/data/facebook 4 0.634523
4 /users/eng/eliavb/data/facebook 5 0.584378

In [8]:
def get_name(data_set):
    if "facebook" in data_set:
        return "Facebook"
    if "live_journal" in data_set:
        return "Live journal"
    if "tweeter" in data_set:
        return "Twitter"
    if "youtube" in data_set:
        return "YouTube"
    if "slashdot" in data_set:
        return "Slashdot"

In [15]:
data_sets = list(set(df.data_set.values))
x_axis = range(1,15)
labels = []
plotHandles = []
plt.figure(figsize=(12,7))
for data_set_ in data_sets:
    df_ = df[df.data_set == data_set_]
    x, = plt.plot(x_axis, df_.time.values)
    plotHandles.append(x)
    labels.append(get_name(data_set_))
plt.axis([1, 14, 0, 1])
plt.xlabel('#threads', fontsize="large")
plt.ylabel('Speedup', fontsize="large")
plt.legend(plotHandles, labels, 'upper right',ncol=1)


Out[15]:
<matplotlib.legend.Legend at 0x7fe722b780d0>

In [55]:
df[df.data_set == "/users/eng/eliavb/data/tweeter"]


Out[55]:
data_set num_threads time
42 /users/eng/eliavb/data/tweeter 1 1.000000
43 /users/eng/eliavb/data/tweeter 2 0.632128
44 /users/eng/eliavb/data/tweeter 3 0.476516
45 /users/eng/eliavb/data/tweeter 4 0.421223
46 /users/eng/eliavb/data/tweeter 5 0.371444
47 /users/eng/eliavb/data/tweeter 6 0.326065
48 /users/eng/eliavb/data/tweeter 7 0.324004
49 /users/eng/eliavb/data/tweeter 8 0.293216
50 /users/eng/eliavb/data/tweeter 9 0.310059
51 /users/eng/eliavb/data/tweeter 10 0.270153
52 /users/eng/eliavb/data/tweeter 11 0.257723
53 /users/eng/eliavb/data/tweeter 12 0.260882
54 /users/eng/eliavb/data/tweeter 13 0.250346
55 /users/eng/eliavb/data/tweeter 14 0.240943

In [ ]: