Human numbers



In [ ]:

    
from fastai.text import *



In [ ]:

    
bs=64

Data



In [ ]:

    
path = untar_data(URLs.HUMAN_NUMBERS)
path.ls()









    Out[ ]:





[PosixPath('/home/ubuntu/.fastai/data/human_numbers/train.txt'),
 PosixPath('/home/ubuntu/.fastai/data/human_numbers/valid.txt')]



In [ ]:

    
def readnums(d): return [', '.join(o.strip() for o in open(path/d).readlines())]



In [ ]:

    
train_txt = readnums('train.txt'); train_txt[0][:80]









    Out[ ]:





'one, two, three, four, five, six, seven, eight, nine, ten, eleven, twelve, thirt'



In [ ]:

    
valid_txt = readnums('valid.txt'); valid_txt[0][-80:]









    Out[ ]:





' nine thousand nine hundred ninety eight, nine thousand nine hundred ninety nine'



In [ ]:

    
train = TextList(train_txt, path=path)
valid = TextList(valid_txt, path=path)

src = ItemLists(path=path, train=train, valid=valid).label_for_lm()
data = src.databunch(bs=bs)



In [ ]:

    
train[0].text[:80]









    Out[ ]:





'xxbos one , two , three , four , five , six , seven , eight , nine , ten , eleve'



In [ ]:

    
len(data.valid_ds[0][0].data)









    Out[ ]:





13017



In [ ]:

    
data.bptt, len(data.valid_dl)









    Out[ ]:





(70, 3)



In [ ]:

    
13017/70/bs









    Out[ ]:





2.905580357142857



In [ ]:

    
it = iter(data.valid_dl)
x1,y1 = next(it)
x2,y2 = next(it)
x3,y3 = next(it)
it.close()



In [ ]:

    
x1.numel()+x2.numel()+x3.numel()









    Out[ ]:





13440



In [ ]:

    
x1.shape,y1.shape









    Out[ ]:





(torch.Size([64, 70]), torch.Size([64, 70]))



In [ ]:

    
x2.shape,y2.shape









    Out[ ]:





(torch.Size([64, 70]), torch.Size([64, 70]))



In [ ]:

    
x1[:,0]









    Out[ ]:





tensor([ 2,  8, 10, 11, 12, 10,  9,  8,  9, 13, 18, 24, 18, 14, 15, 10, 18,  8,
         9,  8, 18, 24, 18, 10, 18, 10,  9,  8, 18, 19, 10, 25, 19, 22, 19, 19,
        23, 19, 10, 13, 10, 10,  8, 13,  8, 19,  9, 19, 34, 16, 10,  9,  8, 16,
         8, 19,  9, 19, 10, 19, 10, 19, 19, 19], device='cuda:0')



In [ ]:

    
y1[:,0]









    Out[ ]:





tensor([18, 18, 26,  9,  8, 11, 31, 18, 25,  9, 10, 14, 10,  9,  8, 14, 10, 18,
        25, 18, 10, 17, 10, 17,  8, 17, 20, 18,  9,  9, 19,  8, 10, 15, 10, 10,
        12, 10, 12,  8, 12, 13, 19,  9, 19, 10, 23, 10,  8,  8, 15, 16, 19,  9,
        19, 10, 23, 10, 18,  8, 18, 10, 10,  9], device='cuda:0')



In [ ]:

    
v = data.valid_ds.vocab



In [ ]:

    
v.textify(x1[0])









    Out[ ]:





'xxbos eight thousand one , eight thousand two , eight thousand three , eight thousand four , eight thousand five , eight thousand six , eight thousand seven , eight thousand eight , eight thousand nine , eight thousand ten , eight thousand eleven , eight thousand twelve , eight thousand thirteen , eight thousand fourteen , eight thousand fifteen , eight thousand sixteen , eight thousand seventeen , eight'



In [ ]:

    
v.textify(y1[0])









    Out[ ]:





'eight thousand one , eight thousand two , eight thousand three , eight thousand four , eight thousand five , eight thousand six , eight thousand seven , eight thousand eight , eight thousand nine , eight thousand ten , eight thousand eleven , eight thousand twelve , eight thousand thirteen , eight thousand fourteen , eight thousand fifteen , eight thousand sixteen , eight thousand seventeen , eight thousand'



In [ ]:

    
v.textify(x2[0])









    Out[ ]:





'thousand eighteen , eight thousand nineteen , eight thousand twenty , eight thousand twenty one , eight thousand twenty two , eight thousand twenty three , eight thousand twenty four , eight thousand twenty five , eight thousand twenty six , eight thousand twenty seven , eight thousand twenty eight , eight thousand twenty nine , eight thousand thirty , eight thousand thirty one , eight thousand thirty two ,'



In [ ]:

    
v.textify(x3[0])









    Out[ ]:





'eight thousand thirty three , eight thousand thirty four , eight thousand thirty five , eight thousand thirty six , eight thousand thirty seven , eight thousand thirty eight , eight thousand thirty nine , eight thousand forty , eight thousand forty one , eight thousand forty two , eight thousand forty three , eight thousand forty four , eight thousand forty five , eight thousand forty six , eight'



In [ ]:

    
v.textify(x1[1])









    Out[ ]:





', eight thousand forty six , eight thousand forty seven , eight thousand forty eight , eight thousand forty nine , eight thousand fifty , eight thousand fifty one , eight thousand fifty two , eight thousand fifty three , eight thousand fifty four , eight thousand fifty five , eight thousand fifty six , eight thousand fifty seven , eight thousand fifty eight , eight thousand fifty nine ,'



In [ ]:

    
v.textify(x2[1])









    Out[ ]:





'eight thousand sixty , eight thousand sixty one , eight thousand sixty two , eight thousand sixty three , eight thousand sixty four , eight thousand sixty five , eight thousand sixty six , eight thousand sixty seven , eight thousand sixty eight , eight thousand sixty nine , eight thousand seventy , eight thousand seventy one , eight thousand seventy two , eight thousand seventy three , eight thousand'



In [ ]:

    
v.textify(x3[1])









    Out[ ]:





'seventy four , eight thousand seventy five , eight thousand seventy six , eight thousand seventy seven , eight thousand seventy eight , eight thousand seventy nine , eight thousand eighty , eight thousand eighty one , eight thousand eighty two , eight thousand eighty three , eight thousand eighty four , eight thousand eighty five , eight thousand eighty six , eight thousand eighty seven , eight thousand eighty'



In [ ]:

    
v.textify(x3[-1])









    Out[ ]:





'ninety , nine thousand nine hundred ninety one , nine thousand nine hundred ninety two , nine thousand nine hundred ninety three , nine thousand nine hundred ninety four , nine thousand nine hundred ninety five , nine thousand nine hundred ninety six , nine thousand nine hundred ninety seven , nine thousand nine hundred ninety eight , nine thousand nine hundred ninety nine xxbos eight thousand one , eight'



In [ ]:

    
data.show_batch(ds_type=DatasetType.Valid)









    




      
    idx
    text
  
  
    0
    thousand forty seven , eight thousand forty eight , eight thousand forty nine , eight thousand fifty , eight thousand fifty one , eight thousand fifty two , eight thousand fifty three , eight thousand fifty four , eight thousand fifty five , eight thousand fifty six , eight thousand fifty seven , eight thousand fifty eight , eight thousand fifty nine , eight thousand sixty , eight thousand sixty
  
  
    1
    eight , eight thousand eighty nine , eight thousand ninety , eight thousand ninety one , eight thousand ninety two , eight thousand ninety three , eight thousand ninety four , eight thousand ninety five , eight thousand ninety six , eight thousand ninety seven , eight thousand ninety eight , eight thousand ninety nine , eight thousand one hundred , eight thousand one hundred one , eight thousand one
  
  
    2
    thousand one hundred twenty four , eight thousand one hundred twenty five , eight thousand one hundred twenty six , eight thousand one hundred twenty seven , eight thousand one hundred twenty eight , eight thousand one hundred twenty nine , eight thousand one hundred thirty , eight thousand one hundred thirty one , eight thousand one hundred thirty two , eight thousand one hundred thirty three , eight thousand
  
  
    3
    three , eight thousand one hundred fifty four , eight thousand one hundred fifty five , eight thousand one hundred fifty six , eight thousand one hundred fifty seven , eight thousand one hundred fifty eight , eight thousand one hundred fifty nine , eight thousand one hundred sixty , eight thousand one hundred sixty one , eight thousand one hundred sixty two , eight thousand one hundred sixty three
  
  
    4
    thousand one hundred eighty three , eight thousand one hundred eighty four , eight thousand one hundred eighty five , eight thousand one hundred eighty six , eight thousand one hundred eighty seven , eight thousand one hundred eighty eight , eight thousand one hundred eighty nine , eight thousand one hundred ninety , eight thousand one hundred ninety one , eight thousand one hundred ninety two , eight thousand

Single fully connected model



In [ ]:

    
data = src.databunch(bs=bs, bptt=3)



In [ ]:

    
x,y = data.one_batch()
x.shape,y.shape









    Out[ ]:





(torch.Size([64, 3]), torch.Size([64, 3]))



In [ ]:

    
nv = len(v.itos); nv









    Out[ ]:





38



In [ ]:

    
nh=64



In [ ]:

    
def loss4(input,target): return F.cross_entropy(input, target[:,-1])
def acc4 (input,target): return accuracy(input, target[:,-1])



In [ ]:

    
class Model0(nn.Module):
    def __init__(self):
        super().__init__()
        self.i_h = nn.Embedding(nv,nh)  # green arrow
        self.h_h = nn.Linear(nh,nh)     # brown arrow
        self.h_o = nn.Linear(nh,nv)     # blue arrow
        self.bn = nn.BatchNorm1d(nh)
        
    def forward(self, x):
        h = self.bn(F.relu(self.h_h(self.i_h(x[:,0]))))
        if x.shape[1]>1:
            h = h + self.i_h(x[:,1])
            h = self.bn(F.relu(self.h_h(h)))
        if x.shape[1]>2:
            h = h + self.i_h(x[:,2])
            h = self.bn(F.relu(self.h_h(h)))
        return self.h_o(h)



In [ ]:

    
learn = Learner(data, Model0(), loss_func=loss4, metrics=acc4)



In [ ]:

    
learn.fit_one_cycle(6, 1e-4)









    




Total time: 00:07 

  
    epoch
    train_loss
    valid_loss
    acc4
  
  
    1
    3.596286
    3.588869
    0.046645
  
  
    2
    3.086100
    3.205763
    0.274816
  
  
    3
    2.494411
    2.749365
    0.392004
  
  
    4
    2.144753
    2.463537
    0.415671
  
  
    5
    2.010915
    2.352887
    0.409237
  
  
    6
    1.983992
    2.336967
    0.408778

Same thing with a loop



In [ ]:

    
class Model1(nn.Module):
    def __init__(self):
        super().__init__()
        self.i_h = nn.Embedding(nv,nh)  # green arrow
        self.h_h = nn.Linear(nh,nh)     # brown arrow
        self.h_o = nn.Linear(nh,nv)     # blue arrow
        self.bn = nn.BatchNorm1d(nh)
        
    def forward(self, x):
        h = torch.zeros(x.shape[0], nh).to(device=x.device)
        for i in range(x.shape[1]):
            h = h + self.i_h(x[:,i])
            h = self.bn(F.relu(self.h_h(h)))
        return self.h_o(h)



In [ ]:

    
learn = Learner(data, Model1(), loss_func=loss4, metrics=acc4)



In [ ]:

    
learn.fit_one_cycle(6, 1e-4)









    




Total time: 00:07 

  
    epoch
    train_loss
    valid_loss
    acc4
  
  
    1
    3.493525
    3.420231
    0.156250
  
  
    2
    2.987600
    2.937893
    0.376149
  
  
    3
    2.440199
    2.477995
    0.388787
  
  
    4
    2.132837
    2.256569
    0.391774
  
  
    5
    2.011305
    2.181337
    0.392923
  
  
    6
    1.985913
    2.170874
    0.393153

Multi fully connected model



In [ ]:

    
data = src.databunch(bs=bs, bptt=20)



In [ ]:

    
x,y = data.one_batch()
x.shape,y.shape









    Out[ ]:





(torch.Size([64, 20]), torch.Size([64, 20]))



In [ ]:

    
class Model2(nn.Module):
    def __init__(self):
        super().__init__()
        self.i_h = nn.Embedding(nv,nh)
        self.h_h = nn.Linear(nh,nh)
        self.h_o = nn.Linear(nh,nv)
        self.bn = nn.BatchNorm1d(nh)
        
    def forward(self, x):
        h = torch.zeros(x.shape[0], nh).to(device=x.device)
        res = []
        for i in range(x.shape[1]):
            h = h + self.i_h(x[:,i])
            h = F.relu(self.h_h(h))
            res.append(self.h_o(self.bn(h)))
        return torch.stack(res, dim=1)



In [ ]:

    
learn = Learner(data, Model2(), metrics=accuracy)



In [ ]:

    
learn.fit_one_cycle(10, 1e-4, pct_start=0.1)









    




Total time: 00:06 

  
    epoch
    train_loss
    valid_loss
    accuracy
  
  
    1
    3.639285
    3.709278
    0.058949
  
  
    2
    3.551151
    3.565677
    0.151776
  
  
    3
    3.439908
    3.431850
    0.207741
  
  
    4
    3.323083
    3.314237
    0.283949
  
  
    5
    3.213422
    3.219906
    0.321662
  
  
    6
    3.119673
    3.151162
    0.336790
  
  
    7
    3.046645
    3.106630
    0.341690
  
  
    8
    2.995379
    3.082552
    0.346662
  
  
    9
    2.963800
    3.073327
    0.349645
  
  
    10
    2.947312
    3.071951
    0.349787

Maintain state



In [ ]:

    
class Model3(nn.Module):
    def __init__(self):
        super().__init__()
        self.i_h = nn.Embedding(nv,nh)
        self.h_h = nn.Linear(nh,nh)
        self.h_o = nn.Linear(nh,nv)
        self.bn = nn.BatchNorm1d(nh)
        self.h = torch.zeros(bs, nh).cuda()
        
    def forward(self, x):
        res = []
        h = self.h
        for i in range(x.shape[1]):
            h = h + self.i_h(x[:,i])
            h = F.relu(self.h_h(h))
            res.append(self.bn(h))
        self.h = h.detach()
        res = torch.stack(res, dim=1)
        res = self.h_o(res)
        return res



In [ ]:

    
learn = Learner(data, Model3(), metrics=accuracy)



In [ ]:

    
learn.fit_one_cycle(20, 3e-3)









    




Total time: 00:11 

  
    epoch
    train_loss
    valid_loss
    accuracy
  
  
    1
    3.598183
    3.556362
    0.050710
  
  
    2
    3.274616
    2.975699
    0.401634
  
  
    3
    2.624206
    2.036894
    0.467330
  
  
    4
    2.022702
    1.956439
    0.316193
  
  
    5
    1.681813
    1.934952
    0.336861
  
  
    6
    1.453007
    1.948201
    0.351349
  
  
    7
    1.276971
    2.005776
    0.368679
  
  
    8
    1.138499
    2.081261
    0.360156
  
  
    9
    1.029217
    2.145853
    0.360795
  
  
    10
    0.939949
    2.215388
    0.372230
  
  
    11
    0.865441
    2.240438
    0.401491
  
  
    12
    0.805310
    2.195846
    0.409375
  
  
    13
    0.755035
    2.324373
    0.422727
  
  
    14
    0.713073
    2.305542
    0.449716
  
  
    15
    0.677393
    2.350155
    0.446449
  
  
    16
    0.645841
    2.418738
    0.446591
  
  
    17
    0.621809
    2.456903
    0.446165
  
  
    18
    0.605300
    2.541699
    0.443040
  
  
    19
    0.594099
    2.539824
    0.443040
  
  
    20
    0.587563
    2.551423
    0.442827

nn.RNN



In [ ]:

    
class Model4(nn.Module):
    def __init__(self):
        super().__init__()
        self.i_h = nn.Embedding(nv,nh)
        self.rnn = nn.RNN(nh,nh, batch_first=True)
        self.h_o = nn.Linear(nh,nv)
        self.bn = BatchNorm1dFlat(nh)
        self.h = torch.zeros(1, bs, nh).cuda()
        
    def forward(self, x):
        res,h = self.rnn(self.i_h(x), self.h)
        self.h = h.detach()
        return self.h_o(self.bn(res))



In [ ]:

    
learn = Learner(data, Model4(), metrics=accuracy)



In [ ]:

    
learn.fit_one_cycle(20, 3e-3)









    




Total time: 00:04 

  
    epoch
    train_loss
    valid_loss
    accuracy
  
  
    1
    3.451432
    3.268344
    0.224148
  
  
    2
    2.974938
    2.456569
    0.466051
  
  
    3
    2.316732
    1.946969
    0.465625
  
  
    4
    1.866151
    1.991952
    0.314702
  
  
    5
    1.618516
    1.802403
    0.437216
  
  
    6
    1.411517
    1.731107
    0.436293
  
  
    7
    1.171916
    1.655979
    0.504048
  
  
    8
    0.965887
    1.579963
    0.522088
  
  
    9
    0.797046
    1.479819
    0.565057
  
  
    10
    0.659378
    1.487831
    0.579048
  
  
    11
    0.553282
    1.441922
    0.597798
  
  
    12
    0.475167
    1.498148
    0.600781
  
  
    13
    0.416131
    1.546984
    0.606463
  
  
    14
    0.372395
    1.594261
    0.607386
  
  
    15
    0.337093
    1.578321
    0.613352
  
  
    16
    0.311385
    1.580973
    0.623366
  
  
    17
    0.292869
    1.625745
    0.618253
  
  
    18
    0.279486
    1.623960
    0.626065
  
  
    19
    0.270054
    1.682090
    0.611719
  
  
    20
    0.263857
    1.675676
    0.614702

2-layer GRU



In [ ]:

    
class Model5(nn.Module):
    def __init__(self):
        super().__init__()
        self.i_h = nn.Embedding(nv,nh)
        self.rnn = nn.GRU(nh, nh, 2, batch_first=True)
        self.h_o = nn.Linear(nh,nv)
        self.bn = BatchNorm1dFlat(nh)
        self.h = torch.zeros(2, bs, nh).cuda()
        
    def forward(self, x):
        res,h = self.rnn(self.i_h(x), self.h)
        self.h = h.detach()
        return self.h_o(self.bn(res))



In [ ]:

    
learn = Learner(data, Model5(), metrics=accuracy)



In [ ]:

    
learn.fit_one_cycle(10, 1e-2)









    




Total time: 00:02 

  
    epoch
    train_loss
    valid_loss
    accuracy
  
  
    1
    2.864854
    2.314943
    0.454545
  
  
    2
    1.798988
    1.357116
    0.629688
  
  
    3
    0.932729
    1.307463
    0.796733
  
  
    4
    0.451969
    1.329699
    0.788636
  
  
    5
    0.225787
    1.293570
    0.800142
  
  
    6
    0.118085
    1.265926
    0.803338
  
  
    7
    0.065306
    1.207096
    0.806960
  
  
    8
    0.038098
    1.205361
    0.813920
  
  
    9
    0.024069
    1.239411
    0.807813
  
  
    10
    0.017078
    1.253409
    0.807102

fin



In [ ]:

idx	text
0	thousand forty seven , eight thousand forty eight , eight thousand forty nine , eight thousand fifty , eight thousand fifty one , eight thousand fifty two , eight thousand fifty three , eight thousand fifty four , eight thousand fifty five , eight thousand fifty six , eight thousand fifty seven , eight thousand fifty eight , eight thousand fifty nine , eight thousand sixty , eight thousand sixty
1	eight , eight thousand eighty nine , eight thousand ninety , eight thousand ninety one , eight thousand ninety two , eight thousand ninety three , eight thousand ninety four , eight thousand ninety five , eight thousand ninety six , eight thousand ninety seven , eight thousand ninety eight , eight thousand ninety nine , eight thousand one hundred , eight thousand one hundred one , eight thousand one
2	thousand one hundred twenty four , eight thousand one hundred twenty five , eight thousand one hundred twenty six , eight thousand one hundred twenty seven , eight thousand one hundred twenty eight , eight thousand one hundred twenty nine , eight thousand one hundred thirty , eight thousand one hundred thirty one , eight thousand one hundred thirty two , eight thousand one hundred thirty three , eight thousand
3	three , eight thousand one hundred fifty four , eight thousand one hundred fifty five , eight thousand one hundred fifty six , eight thousand one hundred fifty seven , eight thousand one hundred fifty eight , eight thousand one hundred fifty nine , eight thousand one hundred sixty , eight thousand one hundred sixty one , eight thousand one hundred sixty two , eight thousand one hundred sixty three
4	thousand one hundred eighty three , eight thousand one hundred eighty four , eight thousand one hundred eighty five , eight thousand one hundred eighty six , eight thousand one hundred eighty seven , eight thousand one hundred eighty eight , eight thousand one hundred eighty nine , eight thousand one hundred ninety , eight thousand one hundred ninety one , eight thousand one hundred ninety two , eight thousand

epoch	train_loss	valid_loss	acc4
1	3.596286	3.588869	0.046645
2	3.086100	3.205763	0.274816
3	2.494411	2.749365	0.392004
4	2.144753	2.463537	0.415671
5	2.010915	2.352887	0.409237
6	1.983992	2.336967	0.408778

epoch	train_loss	valid_loss	acc4
1	3.493525	3.420231	0.156250
2	2.987600	2.937893	0.376149
3	2.440199	2.477995	0.388787
4	2.132837	2.256569	0.391774
5	2.011305	2.181337	0.392923
6	1.985913	2.170874	0.393153

epoch	train_loss	valid_loss	accuracy
1	3.639285	3.709278	0.058949
2	3.551151	3.565677	0.151776
3	3.439908	3.431850	0.207741
4	3.323083	3.314237	0.283949
5	3.213422	3.219906	0.321662
6	3.119673	3.151162	0.336790
7	3.046645	3.106630	0.341690
8	2.995379	3.082552	0.346662
9	2.963800	3.073327	0.349645
10	2.947312	3.071951	0.349787

epoch	train_loss	valid_loss	accuracy
1	3.598183	3.556362	0.050710
2	3.274616	2.975699	0.401634
3	2.624206	2.036894	0.467330
4	2.022702	1.956439	0.316193
5	1.681813	1.934952	0.336861
6	1.453007	1.948201	0.351349
7	1.276971	2.005776	0.368679
8	1.138499	2.081261	0.360156
9	1.029217	2.145853	0.360795
10	0.939949	2.215388	0.372230
11	0.865441	2.240438	0.401491
12	0.805310	2.195846	0.409375
13	0.755035	2.324373	0.422727
14	0.713073	2.305542	0.449716
15	0.677393	2.350155	0.446449
16	0.645841	2.418738	0.446591
17	0.621809	2.456903	0.446165
18	0.605300	2.541699	0.443040
19	0.594099	2.539824	0.443040
20	0.587563	2.551423	0.442827

epoch	train_loss	valid_loss	accuracy
1	3.451432	3.268344	0.224148
2	2.974938	2.456569	0.466051
3	2.316732	1.946969	0.465625
4	1.866151	1.991952	0.314702
5	1.618516	1.802403	0.437216
6	1.411517	1.731107	0.436293
7	1.171916	1.655979	0.504048
8	0.965887	1.579963	0.522088
9	0.797046	1.479819	0.565057
10	0.659378	1.487831	0.579048
11	0.553282	1.441922	0.597798
12	0.475167	1.498148	0.600781
13	0.416131	1.546984	0.606463
14	0.372395	1.594261	0.607386
15	0.337093	1.578321	0.613352
16	0.311385	1.580973	0.623366
17	0.292869	1.625745	0.618253
18	0.279486	1.623960	0.626065
19	0.270054	1.682090	0.611719
20	0.263857	1.675676	0.614702

epoch	train_loss	valid_loss	accuracy
1	2.864854	2.314943	0.454545
2	1.798988	1.357116	0.629688
3	0.932729	1.307463	0.796733
4	0.451969	1.329699	0.788636
5	0.225787	1.293570	0.800142
6	0.118085	1.265926	0.803338
7	0.065306	1.207096	0.806960
8	0.038098	1.205361	0.813920
9	0.024069	1.239411	0.807813
10	0.017078	1.253409	0.807102