In [1]:
from os import environ
environ['optimizer'] = 'Adam'
environ['num_workers']= '2'
environ['batch_size']= str(512)
environ['n_epochs']= '1000'
environ['batch_norm']= 'True'
environ['loss_func']='MAPE'
environ['layers'] = '120 80 30'
environ['dropouts'] = '0.2'+' 0.3'*4
environ['log'] = 'False'
environ['weight_decay'] = '0.00'
environ['cuda_device'] ='cuda:1'
environ['dataset'] = 'data/blur2d.pkl'
%run utils.ipynb
In [2]:
train_dl, val_dl, test_dl = train_dev_split(dataset, batch_size, num_workers, log=log)
db = fai.basic_data.DataBunch(train_dl, val_dl, test_dl, device=device)
In [3]:
input_size = train_dl.dataset.X.shape[1]
output_size = train_dl.dataset.Y.shape[1]
model = None
if batch_norm:
model = Model_BN(input_size, output_size, hidden_sizes=layers_sizes, drops=drops)
else:
model = Model(input_size, output_size)
if loss_func == 'MSE':
criterion = nn.MSELoss()
else:
criterion = mape_criterion
l = fai.Learner(db, model, loss_func=criterion, metrics=[mape_criterion, rmse_criterion],
callback_fns=[partial(EarlyStoppingCallback, mode='min',
monitor='mape_criterion', min_delta=0.1, patience=500)])
if optimizer == 'SGD':
l.opt_func = optim.SGD
In [4]:
l = l.load(f"training_tuning")
In [10]:
l.lr_find()
In [ ]:
l.recorder.plot()
In [4]:
lr = 1e-03
In [5]:
l.fit_one_cycle(100, lr)
In [ ]:
l.recorder.plot_losses()
In [9]:
l.save(f"training_tuning")
In [5]:
val_df = get_results_df(val_dl, l.model)
train_df = get_results_df(train_dl, l.model)
test_df = get_results_df(test_dl, l.model)
In [6]:
df = pd.concat([val_df, test_df])
In [8]:
df[:][['prediction','target', 'abs_diff','APE']].describe()
Out[8]:
In [8]:
df2 = df
joint_plot(df2, f"Validation dataset, {loss_func} loss")
In [56]:
bins = [10, 50, 80, 90]
counts = [df[(100-df.APE) >= thresh].APE.count() for thresh in bins]
In [63]:
import pylab
pylab.bar(range(len(bins)), counts, color='lightseagreen')
pylab.xticks(ticks=range(len(bins)), labels=("> "+ str(thresh)+"%" for thresh in bins))
for i in range(len(counts)):
plt.text(x = i-0.3 , y = counts[i]+50,
s = str(counts[i]) + ', ' + str(counts[i]/100) + '%', size=45)
pylab.xlabel('precision threshold', fontsize=30)
plt.yticks(fontsize=30)
plt.ylabel('scheduled programs', fontsize=30)
plt.xticks(fontsize=30)
Out[63]:
In [15]:
df = val_df
In [18]:
df_ = df.sort_values(by=["target"])
df_['x'] = range(len(df_))
In [32]:
plt.rcParams['figure.figsize'] = [40, 30]
In [19]:
plt.plot('x', 'prediction', 'o',label='prediction', data=df_)
plt.plot('x', 'target', 'o', color='orange', label='real speedup', data=df_)
plt.ylabel('speedup', fontsize=30)
plt.yticks(fontsize=30)
plt.xlabel('scheduled programs', fontsize=30)
plt.xticks(fontsize=30)
plt.legend(prop={"size": 35}, title_fontsize=30)
Out[19]:
In [172]:
df = pd.concat([val_df,test_df], ignore_index=True)
In [6]:
df = val_df
In [7]:
df1 = df[:][['prediction','target', 'name']]
In [58]:
tmp_df = df1[df1.name == 'function382'].sort_values(by=["prediction"], ascending=False)
In [8]:
def evaluation_df(df1, n=1):
eval_df = pd.DataFrame(columns=['performance', 'achieved_speedup', 'max_speedup', 'schedules_count', 'ranking'])
for prog in set(df1.name):
tmp_df = df1[df1.name == prog].sort_values(by=["prediction"], ascending=False)
speedup = tmp_df.iloc[list(range(n))].target.max()
perf = speedup / tmp_df.target.max()
abs_diff = tmp_df.target.max() - speedup
speedups = sorted(tmp_df.target, reverse=True)
rank = speedups.index(speedup) + 1
new = pd.DataFrame()
new['achieved_speedup'] = [speedup]
new['performance'] = [perf]
new['max_speedup'] = [tmp_df.target.max()]
new['schedules_count'] = [len(tmp_df.target)]
new['ranking'] = rank
eval_df = pd.concat([eval_df, new],
ignore_index=True)
return eval_df
In [21]:
eval_df = evaluation_df(df1, n=1)
In [26]:
eval_df.performance.mean()
Out[26]:
In [25]:
eval_df
Out[25]:
In [11]:
eval_df = evaluation_df(df1, n=3)
eval_df
Out[11]:
In [12]:
eval_df = evaluation_df(df1, n=5)
eval_df
Out[12]:
In [10]:
eval_df = evaluation_df(df, n=12)
eval_df
Out[10]:
In [37]:
perfs=[]
for n in range(1, 26):
tmp = evaluation_df(df1, n=n)
perfs.append(tmp.performance.mean()*100)
In [40]:
plt.plot(range(len(perfs)), perfs, 'c-', linewidth=10)
plt.ylabel('average performance (%)', fontsize=30)
plt.yticks(fontsize=30)
plt.xlabel('n', fontsize=30)
plt.xticks(fontsize=30)
Out[40]:
In [ ]: