Initialisation des hyperparamètres


In [1]:
from os import environ

environ['optimizer'] = 'Adam'
environ['num_workers']= '2'
environ['batch_size']= str(512)
environ['batch_norm']= 'True'
environ['loss_func']='MAPE'
environ['layers'] = '500 300 120 80 30'
environ['dropouts'] = '0.3 '*5
environ['log'] = 'False'
environ['weight_decay'] = '0.01'
environ['cuda_device'] ='cuda:1'
environ['dataset'] = 'data/conv.pkl'

%run utils.ipynb
plt.rcParams['figure.figsize'] = [40, 30]

Chargement des données


In [2]:
ds = DatasetFromPkl(dataset, maxsize=None, log=log)

dl = DataLoader(ds, batch_size=batch_size,
                        sampler=SubsetRandomSampler(range(len(ds))),
                         num_workers=num_workers)

db = fai.basic_data.DataBunch(dl, dl, device=device)

train_dl = dl

Creation du modèle


In [3]:
input_size = train_dl.dataset.X.shape[1]
output_size = train_dl.dataset.Y.shape[1]


model = Model_BN(input_size, output_size, hidden_sizes=layers_sizes, drops=drops)

criterion = mape_criterion

l = fai.Learner(db, model, loss_func=criterion)

Architecture du modèle

Feedforward. Equations d'une couche interne l:

$$Z_l = W_l A_{l-1} + b_l$$$$A_l = ReLU(Z_l)$$$$A_l = Batch\_norm_{\alpha,\beta}(A_l)$$

où: $$ReLU(x) = max(0, x)$$

Entrainement du modèle

Taux d'apprentissage $$\alpha = 0.001$$ Nombre d'itérations = 50


In [4]:
alpha = 1e-03
epochs = 50

In [5]:
l.fit_one_cycle(epochs, alpha)


Total time: 00:06

epoch train_loss valid_loss
1 102.136902 36.319344
2 95.399834 90.673561
3 95.601974 120.309677
4 94.530807 122.710693
5 94.068588 108.351311
6 94.731300 89.999863
7 94.989784 73.612503
8 94.931061 55.571342
9 94.775948 46.092339
10 95.678490 47.011852
11 94.988365 52.748470
12 95.677818 59.715439
13 96.149330 65.869965
14 95.282616 66.561928
15 95.231407 74.659256
16 94.573418 77.812202
17 94.425964 79.909836
18 94.370071 81.907143
19 94.038322 90.442436
20 94.168243 99.366852
21 94.212456 99.996910
22 93.869476 99.996910
23 93.374069 99.996910
24 92.960373 99.996902
25 92.783279 99.996902
26 92.719833 99.996887
27 92.616486 99.996910
28 92.848145 99.996887
29 92.606606 97.752266
30 92.321373 87.491920
31 91.974571 75.334389
32 91.812790 69.563858
33 91.496567 67.985573
34 91.401436 67.564590
35 90.932724 68.609123
36 90.822716 68.753227
37 90.503227 71.630951
38 90.784142 81.138191
39 90.572693 87.732185
40 90.567139 89.224472
41 90.335022 90.892471
42 90.309708 90.823761
43 90.286461 91.517250
44 90.248314 91.358170
45 90.118774 90.675117
46 89.816093 88.862595
47 89.587036 87.159164
48 89.438080 86.754250
49 89.319214 86.060349
50 89.314484 85.649551


In [6]:
l.recorder.plot_losses()



In [7]:
l.recorder.plot_lr()



In [8]:
l = l.load(f"training_tuning")

In [9]:
train_df = get_results_df(train_dl, l.model)
df = train_df
df = df[1:]

In [11]:
df[:][['index', 'prediction']].sort_values(by='prediction', ascending=False)


Out[11]:
index prediction
58 58 0.580466
54 54 0.552432
77 77 0.523198
44 44 0.514327
31 31 0.470662
14 14 0.459485
3 3 0.457847
88 88 0.456971
28 28 0.454145
33 33 0.435479
66 66 0.429161
27 27 0.428684
57 57 0.426915
36 36 0.415167
32 32 0.411845
55 55 0.407105
45 45 0.406414
37 37 0.403094
30 30 0.402889
40 40 0.399338
110 110 0.393791
41 41 0.387817
2 2 0.386533
29 29 0.385452
99 99 0.380164
26 26 0.376292
1 1 0.376145
56 56 0.369614
35 35 0.368872
25 25 0.364135
... ... ...
67 67 0.126719
69 69 0.125647
72 72 0.123660
73 73 0.123094
76 76 0.122970
65 65 0.119446
78 78 0.117134
81 81 0.117131
75 75 0.115514
82 82 0.113485
85 85 0.112707
71 71 0.112498
74 74 0.108428
86 86 0.108027
80 80 0.107547
90 90 0.105361
95 95 0.105032
91 91 0.104608
98 98 0.103850
94 94 0.103147
70 70 0.102378
84 84 0.100961
79 79 0.097342
89 89 0.091621
93 93 0.087628
83 83 0.085667
97 97 0.084371
87 87 0.083552
92 92 0.078015
96 96 0.075144

120 rows × 2 columns


In [12]:
ds.schedules[58].schedule_list


Out[12]:
[{'type': 'interchange', 'params': [0, 5], 'factors': None},
 {'type': 'tiling', 'params': [-1, -1, -1], 'factors': [-1, -1, -1]},
 {'type': 'unrolling', 'params': None, 'factors': [16]}]

Real speedup of schedule 7 : s = 1.29


In [ ]:


In [ ]:

Real speedup of schedule 19 : s = 1.73

Convolution


In [26]:
ds.programs[0].dict_repr


Out[26]:
{'seed': 10243,
 'type': 5,
 'loops': {'n': 3,
  'loops_array': [{'loop_id': 0,
    'parent': -1,
    'position': 0,
    'loop_it': 0,
    'assignments': {'n': 0, 'assignments_array': []}},
   {'loop_id': 1,
    'parent': 0,
    'position': 0,
    'loop_it': 5,
    'assignments': {'n': 0, 'assignments_array': []}},
   {'loop_id': 2,
    'parent': 1,
    'position': 0,
    'loop_it': 6,
    'assignments': {'n': 1,
     'assignments_array': [{'id': 0, 'position': 0}]}}]},
 'computations': {'n': 1,
  'computations_array': [{'comp_id': 0,
    'lhs_data_type': 'p_int32',
    'loop_iterators_ids': [0, 5, 6],
    'operations_histogram': [[0, 0, 0, 0],
     [0, 0, 0, 0],
     [0, 0, 0, 0],
     [0, 0, 0, 0],
     [0, 0, 0, 0],
     [0, 0, 0, 0],
     [0, 0, 0, 0]],
    'rhs_accesses': {'n': 18,
     'accesses': [{'comp_id': 1, 'access': [[0, 1, 0, 0], [0, 0, 1, 0]]},
      {'comp_id': 2, 'access': [[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0]]},
      {'comp_id': 1, 'access': [[0, 1, 0, 0], [0, 0, 1, 1]]},
      {'comp_id': 2, 'access': [[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 1]]},
      {'comp_id': 1, 'access': [[0, 1, 0, 0], [0, 0, 1, 2]]},
      {'comp_id': 2, 'access': [[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 2]]},
      {'comp_id': 1, 'access': [[0, 1, 0, 1], [0, 0, 1, 0]]},
      {'comp_id': 2, 'access': [[1, 0, 0, 0], [0, 1, 0, 1], [0, 0, 1, 0]]},
      {'comp_id': 1, 'access': [[0, 1, 0, 1], [0, 0, 1, 1]]},
      {'comp_id': 2, 'access': [[1, 0, 0, 0], [0, 1, 0, 1], [0, 0, 1, 1]]},
      {'comp_id': 1, 'access': [[0, 1, 0, 1], [0, 0, 1, 2]]},
      {'comp_id': 2, 'access': [[1, 0, 0, 0], [0, 1, 0, 1], [0, 0, 1, 2]]},
      {'comp_id': 1, 'access': [[0, 1, 0, 2], [0, 0, 1, 0]]},
      {'comp_id': 2, 'access': [[1, 0, 0, 0], [0, 1, 0, 2], [0, 0, 1, 0]]},
      {'comp_id': 1, 'access': [[0, 1, 0, 2], [0, 0, 1, 1]]},
      {'comp_id': 2, 'access': [[1, 0, 0, 0], [0, 1, 0, 2], [0, 0, 1, 1]]},
      {'comp_id': 1, 'access': [[0, 1, 0, 2], [0, 0, 1, 2]]},
      {'comp_id': 2,
       'access': [[1, 0, 0, 0], [0, 1, 0, 2], [0, 0, 1, 2]]}]}}]},
 'inputs': {'n': 2,
  'inputs_array': [{'input_id': 1,
    'data_type': 'p_int32',
    'loop_iterators_ids': [1, 2]},
   {'input_id': 2, 'data_type': 'p_int32', 'loop_iterators_ids': [0, 3, 4]}]},
 'iterators': {'n': 7,
  'iterators_array': [{'it_id': 0, 'lower_bound': 0, 'upper_bound': 3},
   {'it_id': 1, 'lower_bound': 0, 'upper_bound': 1024},
   {'it_id': 2, 'lower_bound': 0, 'upper_bound': 1024},
   {'it_id': 3, 'lower_bound': 0, 'upper_bound': 3},
   {'it_id': 4, 'lower_bound': 0, 'upper_bound': 3},
   {'it_id': 5, 'lower_bound': 0, 'upper_bound': 1022},
   {'it_id': 6, 'lower_bound': 0, 'upper_bound': 1022}]}}

In [ ]:
conv_dataloader = dl

In [20]:
df = get_results_df(conv_dataloader, l.model)

In [24]:
df[:][['index', 'prediction']].sort_values(by='prediction', ascending=False)


Out[24]:
index prediction target
58 58 0.580466 1.871575
54 54 0.552432 1.885073
77 77 0.523198 0.331829
44 44 0.514327 0.266633
31 31 0.470662 0.317572
14 14 0.459485 0.265213
3 3 0.457847 0.343183
88 88 0.456971 0.263550
28 28 0.454145 0.253250
33 33 0.435479 0.711308
66 66 0.429161 0.440137
27 27 0.428684 0.346705
57 57 0.426915 1.553498
36 36 0.415167 0.336343
32 32 0.411845 0.269034
0 0 0.407359 1.000000
55 55 0.407105 0.368032
45 45 0.406414 0.308645
37 37 0.403094 0.266908
30 30 0.402889 0.432118
40 40 0.399338 0.341547
110 110 0.393791 0.457553
41 41 0.387817 0.249252
2 2 0.386533 0.409968
29 29 0.385452 0.359115
99 99 0.380164 0.378962
26 26 0.376292 0.461779
1 1 0.376145 0.357928
56 56 0.369614 1.797229
35 35 0.368872 0.451103
... ... ... ...
67 67 0.126719 0.737426
69 69 0.125647 0.717404
72 72 0.123660 0.731132
73 73 0.123094 0.701481
76 76 0.122970 0.699363
65 65 0.119446 0.763868
78 78 0.117134 0.634996
81 81 0.117131 0.633130
75 75 0.115514 0.694224
82 82 0.113485 0.596804
85 85 0.112707 0.596734
71 71 0.112498 0.695500
74 74 0.108428 0.699361
86 86 0.108027 0.376278
80 80 0.107547 0.636520
90 90 0.105361 0.403301
95 95 0.105032 0.319196
91 91 0.104608 0.335455
98 98 0.103850 0.322274
94 94 0.103147 0.337526
70 70 0.102378 0.715115
84 84 0.100961 0.589106
79 79 0.097342 0.638447
89 89 0.091621 0.380005
93 93 0.087628 0.335197
83 83 0.085667 0.600259
97 97 0.084371 0.321473
87 87 0.083552 0.382131
92 92 0.078015 0.340908
96 96 0.075144 0.322425

121 rows × 3 columns


In [27]:
ds.schedules[54].schedule_list


Out[27]:
[{'type': 'interchange', 'params': [0, 5], 'factors': None},
 {'type': 'tiling', 'params': [-1, -1, -1], 'factors': [-1, -1, -1]},
 {'type': 'unrolling', 'params': None, 'factors': [1]}]

Real speedup of schedule 58 : s = 1.87


In [ ]:

Real speedup of schedule 54 : s = 1.89