In [1]:
from os import environ

environ['optimizer'] = 'Adam'
environ['num_workers']= '2'
environ['batch_size']= str(512)
environ['n_epochs']= '1000'
environ['batch_norm']= 'True'
environ['loss_func']='MAPE'
environ['layers'] = '120 80 30'
environ['dropouts'] = '0.2'+' 0.3'*4
environ['log'] = 'False'
environ['weight_decay'] = '0.00'
environ['cuda_device'] ='cuda:1'
environ['dataset'] = 'data/blur2d.pkl'

%run utils.ipynb

In [2]:
train_dl, val_dl, test_dl = train_dev_split(dataset, batch_size, num_workers, log=log)

db = fai.basic_data.DataBunch(train_dl, val_dl, test_dl, device=device)

In [3]:
input_size = train_dl.dataset.X.shape[1]
output_size = train_dl.dataset.Y.shape[1]


model = None 

if batch_norm:
    model = Model_BN(input_size, output_size, hidden_sizes=layers_sizes, drops=drops)
else:
    model = Model(input_size, output_size)
    
if loss_func == 'MSE':
    criterion = nn.MSELoss()
else:
    criterion = mape_criterion

l = fai.Learner(db, model, loss_func=criterion, metrics=[mape_criterion, rmse_criterion],
               callback_fns=[partial(EarlyStoppingCallback, mode='min', 
                                        monitor='mape_criterion', min_delta=0.1, patience=500)])

if optimizer == 'SGD':
    l.opt_func = optim.SGD

In [4]:
l = l.load(f"training_tuning")

In [10]:
l.lr_find()


---------------------------------------------------------------------------
ZeroDivisionError                         Traceback (most recent call last)
<ipython-input-10-d3b7136227cf> in <module>
----> 1 l.lr_find()

~/anaconda3/lib/python3.6/site-packages/fastai/train.py in lr_find(learn, start_lr, end_lr, num_it, stop_div, **kwargs)
     28     end_lr = np.array(end_lr) if is_listy(end_lr) else end_lr
     29     cb = LRFinder(learn, start_lr, end_lr, num_it, stop_div)
---> 30     a = int(np.ceil(num_it/len(learn.data.train_dl)))
     31     learn.fit(a, start_lr, callbacks=[cb], **kwargs)
     32 

ZeroDivisionError: division by zero

In [ ]:
l.recorder.plot()

In [4]:
lr = 1e-03

In [5]:
l.fit_one_cycle(100, lr)


0.00% [0/100 00:00<00:00]
epoch train_loss valid_loss mape_criterion rmse_criterion
100% [0/0]
/home/mohammed/anaconda3/lib/python3.6/site-packages/fastprogress/fastprogress.py:96: UserWarning: Your generator is empty.
  warn("Your generator is empty.")
/home/mohammed/anaconda3/lib/python3.6/site-packages/fastprogress/fastprogress.py:96: UserWarning: Your generator is empty.
  warn("Your generator is empty.")
---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-5-e55a1bbe4f4a> in <module>
----> 1 l.fit_one_cycle(100, lr)

~/anaconda3/lib/python3.6/site-packages/fastai/train.py in fit_one_cycle(learn, cyc_len, max_lr, moms, div_factor, pct_start, wd, callbacks, **kwargs)
     19     callbacks.append(OneCycleScheduler(learn, max_lr, moms=moms, div_factor=div_factor,
     20                                         pct_start=pct_start, **kwargs))
---> 21     learn.fit(cyc_len, max_lr, wd=wd, callbacks=callbacks)
     22 
     23 def lr_find(learn:Learner, start_lr:Floats=1e-7, end_lr:Floats=10, num_it:int=100, stop_div:bool=True, **kwargs:Any):

~/anaconda3/lib/python3.6/site-packages/fastai/basic_train.py in fit(self, epochs, lr, wd, callbacks)
    164         callbacks = [cb(self) for cb in self.callback_fns] + listify(callbacks)
    165         fit(epochs, self.model, self.loss_func, opt=self.opt, data=self.data, metrics=self.metrics,
--> 166             callbacks=self.callbacks+callbacks)
    167 
    168     def create_opt(self, lr:Floats, wd:Floats=0.)->None:

~/anaconda3/lib/python3.6/site-packages/fastai/basic_train.py in fit(epochs, model, loss_func, opt, data, callbacks, metrics)
     92     except Exception as e:
     93         exception = e
---> 94         raise e
     95     finally: cb_handler.on_train_end(exception)
     96 

~/anaconda3/lib/python3.6/site-packages/fastai/basic_train.py in fit(epochs, model, loss_func, opt, data, callbacks, metrics)
     87             if hasattr(data,'valid_dl') and data.valid_dl is not None and data.valid_ds is not None:
     88                 val_loss = validate(model, data.valid_dl, loss_func=loss_func,
---> 89                                        cb_handler=cb_handler, pbar=pbar)
     90             else: val_loss=None
     91             if cb_handler.on_epoch_end(val_loss): break

~/anaconda3/lib/python3.6/site-packages/fastai/basic_train.py in validate(model, dl, loss_func, cb_handler, pbar, average, n_batch)
     55             if n_batch and (len(nums)>=n_batch): break
     56         nums = np.array(nums, dtype=np.float32)
---> 57         if average: return (to_np(torch.stack(val_losses)) * nums).sum() / nums.sum()
     58         else:       return val_losses
     59 

RuntimeError: expected a non-empty list of Tensors

In [ ]:
l.recorder.plot_losses()

In [9]:
l.save(f"training_tuning")

In [5]:
val_df = get_results_df(val_dl, l.model)
train_df = get_results_df(train_dl, l.model)
test_df = get_results_df(test_dl, l.model)

In [6]:
df = pd.concat([val_df, test_df])

In [8]:
df[:][['prediction','target', 'abs_diff','APE']].describe()


Out[8]:
prediction target abs_diff APE
count 10000.000000 10000.000000 10000.000000 10000.000000
mean 0.413576 0.469633 0.121196 27.027849
std 0.472750 0.748854 0.377212 32.709480
min 0.014930 0.011766 0.000011 0.004518
25% 0.124545 0.119905 0.014898 8.726650
50% 0.297695 0.299996 0.046940 18.616159
75% 0.548960 0.609846 0.114020 32.135881
max 4.243530 11.273418 7.439478 426.500885

In [8]:
df2 = df
joint_plot(df2, f"Validation dataset, {loss_func} loss")


/data/scratch/henni-mohammed/anaconda3/lib/python3.7/site-packages/scipy/stats/stats.py:1713: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result.
  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval

In [56]:
bins = [10, 50,  80, 90]

counts = [df[(100-df.APE) >= thresh].APE.count() for thresh in bins]

In [63]:
import pylab

pylab.bar(range(len(bins)), counts, color='lightseagreen')
pylab.xticks(ticks=range(len(bins)), labels=("> "+ str(thresh)+"%" for thresh in bins))

for i in range(len(counts)):
    plt.text(x = i-0.3 , y = counts[i]+50, 
             s = str(counts[i]) + ', ' + str(counts[i]/100) + '%', size=45)

pylab.xlabel('precision threshold', fontsize=30)
plt.yticks(fontsize=30)
plt.ylabel('scheduled programs', fontsize=30)
plt.xticks(fontsize=30)


Out[63]:
(array([0, 1, 2, 3]), <a list of 4 Text xticklabel objects>)

In [15]:
df = val_df

In [18]:
df_ = df.sort_values(by=["target"])

df_['x'] = range(len(df_))

In [32]:
plt.rcParams['figure.figsize'] = [40, 30]

In [19]:
plt.plot('x', 'prediction', 'o',label='prediction',  data=df_)

plt.plot('x', 'target',  'o', color='orange', label='real speedup', data=df_)


plt.ylabel('speedup', fontsize=30)
plt.yticks(fontsize=30)
plt.xlabel('scheduled programs', fontsize=30)
plt.xticks(fontsize=30)
plt.legend(prop={"size": 35}, title_fontsize=30)


Out[19]:
<matplotlib.legend.Legend at 0x7f014c099b00>

In [172]:
df = pd.concat([val_df,test_df], ignore_index=True)

In [6]:
df = val_df

In [7]:
df1 = df[:][['prediction','target', 'name']]

In [58]:
tmp_df = df1[df1.name == 'function382'].sort_values(by=["prediction"], ascending=False)

In [8]:
def evaluation_df(df1, n=1):
    eval_df = pd.DataFrame(columns=['performance', 'achieved_speedup', 'max_speedup', 'schedules_count', 'ranking'])

    for prog in set(df1.name):
        tmp_df = df1[df1.name == prog].sort_values(by=["prediction"], ascending=False)
        
        speedup = tmp_df.iloc[list(range(n))].target.max()

        perf = speedup / tmp_df.target.max()
        abs_diff =  tmp_df.target.max() - speedup

        
        speedups = sorted(tmp_df.target, reverse=True)
        
        rank = speedups.index(speedup) + 1
        

        new = pd.DataFrame()
        new['achieved_speedup'] = [speedup]
        new['performance'] = [perf]
        new['max_speedup'] = [tmp_df.target.max()]
        new['schedules_count'] = [len(tmp_df.target)]
        new['ranking'] = rank
 
        eval_df = pd.concat([eval_df, new],
                   ignore_index=True)         
    
    
   
    return eval_df

In [21]:
eval_df = evaluation_df(df1, n=1)


/data/scratch/henni-mohammed/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:26: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.

To retain the current behavior and silence the warning, pass 'sort=True'.


In [26]:
eval_df.performance.mean()


Out[26]:
0.8388058332835927

In [25]:
eval_df


Out[25]:
achieved_speedup max_speedup performance ranking schedules_count
0 7.211255 8.414969 0.856956 4 80
1 0.896169 1.000000 0.896169 35 532
2 0.927371 1.116073 0.830923 41 812
3 4.251027 7.242253 0.586976 5 80
4 1.373537 1.497731 0.917079 5 736
5 1.000000 1.060186 0.943231 11 1388
6 0.985381 1.347270 0.731391 86 736
7 1.006172 1.532539 0.656539 282 1384
8 0.864287 1.095596 0.788874 22 520
9 1.019130 1.170114 0.870967 26 736
10 1.000000 1.122290 0.891036 13 1100
11 0.911495 1.095437 0.832084 69 736
12 9.833364 11.273418 0.872261 14 56
13 0.950039 1.124060 0.845186 32 340
14 1.021583 1.021583 1.000000 1 532
15 0.987670 1.132885 0.871818 12 152
16 3.147563 3.625341 0.868212 11 80

In [11]:
eval_df = evaluation_df(df1, n=3)
eval_df


/data/scratch/henni-mohammed/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:26: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.

To retain the current behavior and silence the warning, pass 'sort=True'.

Out[11]:
achieved_speedup max_speedup performance ranking schedules_count
0 7.211255 8.414969 0.856956 4 80
1 0.921409 1.000000 0.921409 16 532
2 1.000000 1.116073 0.895998 17 812
3 4.251027 7.242253 0.586976 5 80
4 1.373537 1.497731 0.917079 5 736
5 1.000000 1.060186 0.943231 11 1388
6 0.985381 1.347270 0.731391 86 736
7 1.280553 1.532539 0.835576 42 1384
8 0.882452 1.095596 0.805453 18 520
9 1.068108 1.170114 0.912824 7 736
10 1.001548 1.122290 0.892415 10 1100
11 1.034025 1.095437 0.943939 8 736
12 11.166584 11.273418 0.990523 2 56
13 0.950039 1.124060 0.845186 32 340
14 1.021583 1.021583 1.000000 1 532
15 1.132885 1.132885 1.000000 1 152
16 3.147563 3.625341 0.868212 11 80

In [12]:
eval_df = evaluation_df(df1, n=5)
eval_df


/data/scratch/henni-mohammed/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:26: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.

To retain the current behavior and silence the warning, pass 'sort=True'.

Out[12]:
achieved_speedup max_speedup performance ranking schedules_count
0 8.414969 8.414969 1.000000 1 80
1 0.921409 1.000000 0.921409 16 532
2 1.086396 1.116073 0.973409 3 812
3 4.251027 7.242253 0.586976 5 80
4 1.373537 1.497731 0.917079 5 736
5 1.000000 1.060186 0.943231 11 1388
6 0.985381 1.347270 0.731391 86 736
7 1.280553 1.532539 0.835576 42 1384
8 0.882452 1.095596 0.805453 18 520
9 1.118755 1.170114 0.956108 3 736
10 1.001548 1.122290 0.892415 10 1100
11 1.034025 1.095437 0.943939 8 736
12 11.166584 11.273418 0.990523 2 56
13 0.966923 1.124060 0.860206 31 340
14 1.021583 1.021583 1.000000 1 532
15 1.132885 1.132885 1.000000 1 152
16 3.290193 3.625341 0.907554 5 80

In [10]:
eval_df = evaluation_df(df, n=12)
eval_df


/data/scratch/henni-mohammed/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:26: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.

To retain the current behavior and silence the warning, pass 'sort=True'.

Out[10]:
achieved_speedup max_speedup performance ranking schedules_count
0 1.373537 1.497731 0.917079 5 736
1 1.054888 1.347270 0.782982 50 736
2 1.132885 1.132885 1.000000 1 152
3 7.242253 7.242253 1.000000 1 80
4 1.000000 1.124060 0.889632 24 340
5 0.915675 1.095596 0.835778 16 520
6 1.324832 1.532539 0.864469 28 1384
7 11.273418 11.273418 1.000000 1 56
8 8.414969 8.414969 1.000000 1 80
9 3.445319 3.625341 0.950343 2 80
10 1.106763 1.122290 0.986165 2 1100
11 1.038723 1.095437 0.948227 7 736
12 1.060186 1.060186 1.000000 1 1388
13 1.170114 1.170114 1.000000 1 736
14 1.000000 1.000000 1.000000 1 532
15 1.021583 1.021583 1.000000 1 532
16 1.086396 1.116073 0.973409 3 812

In [37]:
perfs=[]
for n in range(1, 26):
    tmp = evaluation_df(df1, n=n)
    perfs.append(tmp.performance.mean()*100)


/data/scratch/henni-mohammed/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:26: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.

To retain the current behavior and silence the warning, pass 'sort=True'.


In [40]:
plt.plot(range(len(perfs)), perfs, 'c-', linewidth=10)

plt.ylabel('average performance (%)', fontsize=30)
plt.yticks(fontsize=30)
plt.xlabel('n', fontsize=30)
plt.xticks(fontsize=30)


Out[40]:
(array([-5.,  0.,  5., 10., 15., 20., 25., 30.]),
 <a list of 8 Text xticklabel objects>)

In [ ]: