In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = [20, 20]
import numpy as np
import dill as pickle
from src.data.dataset import *
from src.data import loop_ast
import pandas as pd
import seaborn as sns
In [2]:
ls data
In [ ]:
In [4]:
DatasetFromPkl.pickle_data('../blur_h/', 'data/blur_h.pkl')
In [1]:
In [5]:
D = DatasetFromPkl('data/speedup_dataset3.pkl', maxsize=None)
In [12]:
Out[12]:
In [5]:
df = pd.DataFrame()
df['program'] = D.program_indexes
df['speedup'] = D.Y
df['schedule'] = [D.schedules[index].binary_repr for index in range(len(D))]
In [7]:
schedules = set(df['schedule'])
In [21]:
df[(df.schedule==(1, 0, 0)) & (df.program ==110)].speedup
Out[21]:
In [23]:
scores = []
for sch in schedules:
programs = set(df[df.schedule == sch].program)
#score = np.mean([float(df[(df.schedule==sch) & (df.program == p)].speedup)/max(df[df.program==p].speedup) for p in programs])
score = [(df[(df.schedule==sch) & (df.program == p)].speedup) for p in programs]
scores.append(score)
In [36]:
programs = set(df.program)
schedules = set(df.schedule)
scores = {}
for schedule in schedules:
scores[schedule] = []
for p in programs:
tmp_df = df[df.program == p]
best = max(tmp_df.speedup)
for _, element in tmp_df.iterrows():
scores[element.schedule].append(element.speedup/best)
for schedule in scores:
scores[schedule] = np.mean(scores[schedule])
In [60]:
scores
Out[60]:
In [ ]:
In [64]:
df.head()
Out[64]:
In [57]:
scores
In [ ]:
df = pd.DataFramerame()
df['0.1'] = losses[0]
df['0.01'] = losses[1]
df['0.001'] = losses[2]
df['0.0001'] = losses[3]
df['x'] = df.index
df1 = df
plt.plot('x', '0.1', data=df1, color='blue')
plt.plot('x', '0.01', data=df1, color='orange')
plt.plot('x', '0.001', data=df1, color='green')
plt.plot('x', '0.0001', data=df1, color='red')
plt.xlabel('epochs')
plt.ylabel('loss')
plt.legend()
In [123]:
programs = set(df.program)
df1 = pd.DataFrame()
schedules = [(0, 1, 0), (1, 0, 0), (1, 1, 0)]
unrolled = []
not_unrolled = []
for p in programs:
tmp_df = df[df.program == p]
program_schedules = set(tmp_df.schedule)
for sch in schedules:
unrolled_sch = sch[0], sch[1], 1
if sch in program_schedules and unrolled_sch in program_schedules:
t = tmp_df[tmp_df.schedule == sch].speedup.mean()
not_unrolled.append(t)
t = tmp_df[tmp_df.schedule == unrolled_sch].speedup.mean()
unrolled.append(t)
unrolled, not_unrolled = zip(*sorted(list(zip(unrolled, not_unrolled)), key=lambda x:x[1]))
df1['unrolled'] = unrolled
df1['not_unrolled'] = not_unrolled
df1['x'] = df1.index
In [124]:
df1['x'] = df1.index
In [2]:
plt.plot('x', 'unrolled', data=df1, color='blue')
plt.plot('x', 'not_unrolled', data=df1, label='not unrolled', color='orange')
plt.xlabel('scheduled program')
plt.ylabel('speedup')
plt.legend()
In [111]:
sns.lineplot(data=df1) (y + H*x)
Out[111]:
In [ ]: