iteration 1 / 5000: loss 2.409961
iteration 101 / 5000: loss 2.158140
iteration 201 / 5000: loss 2.009294
iteration 301 / 5000: loss 1.979639
iteration 401 / 5000: loss 1.942002
iteration 501 / 5000: loss 1.827258
iteration 601 / 5000: loss 1.835251
iteration 701 / 5000: loss 1.704696
iteration 801 / 5000: loss 1.769029
iteration 901 / 5000: loss 1.675095
iteration 1001 / 5000: loss 1.689062
iteration 1101 / 5000: loss 1.786561
iteration 1201 / 5000: loss 1.822278
iteration 1301 / 5000: loss 1.721067
iteration 1401 / 5000: loss 1.730675
iteration 1501 / 5000: loss 1.686564
iteration 1601 / 5000: loss 1.544546
iteration 1701 / 5000: loss 1.619521
iteration 1801 / 5000: loss 1.678234
iteration 1901 / 5000: loss 1.666344
iteration 2001 / 5000: loss 1.637763
iteration 2101 / 5000: loss 1.687455
iteration 2201 / 5000: loss 1.479651
iteration 2301 / 5000: loss 1.572565
iteration 2401 / 5000: loss 1.578272
iteration 2501 / 5000: loss 1.612838
iteration 2601 / 5000: loss 1.767758
iteration 2701 / 5000: loss 1.624772
iteration 2801 / 5000: loss 1.643853
iteration 2901 / 5000: loss 1.544022
iteration 3001 / 5000: loss 1.699063
iteration 3101 / 5000: loss 1.603541
iteration 3201 / 5000: loss 1.684917
iteration 3301 / 5000: loss 1.572763
iteration 3401 / 5000: loss 1.582653
iteration 3501 / 5000: loss 1.643857
iteration 3601 / 5000: loss 1.476511
iteration 3701 / 5000: loss 1.603652
iteration 3801 / 5000: loss 1.518479
iteration 3901 / 5000: loss 1.536813
iteration 4001 / 5000: loss 1.643620
iteration 4101 / 5000: loss 1.576849
iteration 4201 / 5000: loss 1.540096
iteration 4301 / 5000: loss 1.607825
iteration 4401 / 5000: loss 1.680330
iteration 4501 / 5000: loss 1.610213
iteration 4601 / 5000: loss 1.581618
iteration 4701 / 5000: loss 1.662318
iteration 4801 / 5000: loss 1.509553
iteration 4901 / 5000: loss 1.609396
hidden_layer_size: 50, lr: 3.000000e-04, reg: 7.000000e-01, train_acc: 0.500429, val_acc: 0.485000
iteration 1 / 5000: loss 2.425271
iteration 101 / 5000: loss 2.220153
iteration 201 / 5000: loss 2.081063
iteration 301 / 5000: loss 2.018277
iteration 401 / 5000: loss 2.017465
iteration 501 / 5000: loss 1.872938
iteration 601 / 5000: loss 1.929252
iteration 701 / 5000: loss 1.849218
iteration 801 / 5000: loss 1.740892
iteration 901 / 5000: loss 1.737878
iteration 1001 / 5000: loss 1.716343
iteration 1101 / 5000: loss 1.695569
iteration 1201 / 5000: loss 1.672084
iteration 1301 / 5000: loss 1.605759
iteration 1401 / 5000: loss 1.706853
iteration 1501 / 5000: loss 1.750231
iteration 1601 / 5000: loss 1.675886
iteration 1701 / 5000: loss 1.770472
iteration 1801 / 5000: loss 1.569446
iteration 1901 / 5000: loss 1.675889
iteration 2001 / 5000: loss 1.638305
iteration 2101 / 5000: loss 1.671673
iteration 2201 / 5000: loss 1.654543
iteration 2301 / 5000: loss 1.702525
iteration 2401 / 5000: loss 1.690901
iteration 2501 / 5000: loss 1.532892
iteration 2601 / 5000: loss 1.648299
iteration 2701 / 5000: loss 1.546824
iteration 2801 / 5000: loss 1.630912
iteration 2901 / 5000: loss 1.652404
iteration 3001 / 5000: loss 1.697700
iteration 3101 / 5000: loss 1.789114
iteration 3201 / 5000: loss 1.567175
iteration 3301 / 5000: loss 1.560006
iteration 3401 / 5000: loss 1.616820
iteration 3501 / 5000: loss 1.643663
iteration 3601 / 5000: loss 1.477822
iteration 3701 / 5000: loss 1.558004
iteration 3801 / 5000: loss 1.466781
iteration 3901 / 5000: loss 1.585170
iteration 4001 / 5000: loss 1.608458
iteration 4101 / 5000: loss 1.599268
iteration 4201 / 5000: loss 1.595368
iteration 4301 / 5000: loss 1.653542
iteration 4401 / 5000: loss 1.599541
iteration 4501 / 5000: loss 1.607244
iteration 4601 / 5000: loss 1.539681
iteration 4701 / 5000: loss 1.644408
iteration 4801 / 5000: loss 1.582699
iteration 4901 / 5000: loss 1.567336
hidden_layer_size: 50, lr: 3.000000e-04, reg: 8.000000e-01, train_acc: 0.500510, val_acc: 0.482000
iteration 1 / 5000: loss 2.442235
iteration 101 / 5000: loss 2.223433
iteration 201 / 5000: loss 2.065532
iteration 301 / 5000: loss 1.950631
iteration 401 / 5000: loss 1.967382
iteration 501 / 5000: loss 1.950207
iteration 601 / 5000: loss 1.775823
iteration 701 / 5000: loss 1.811384
iteration 801 / 5000: loss 1.819327
iteration 901 / 5000: loss 1.787936
iteration 1001 / 5000: loss 1.745111
iteration 1101 / 5000: loss 1.883332
iteration 1201 / 5000: loss 1.634129
iteration 1301 / 5000: loss 1.784145
iteration 1401 / 5000: loss 1.632121
iteration 1501 / 5000: loss 1.620308
iteration 1601 / 5000: loss 1.775527
iteration 1701 / 5000: loss 1.613225
iteration 1801 / 5000: loss 1.639705
iteration 1901 / 5000: loss 1.618770
iteration 2001 / 5000: loss 1.705976
iteration 2101 / 5000: loss 1.555351
iteration 2201 / 5000: loss 1.697385
iteration 2301 / 5000: loss 1.576147
iteration 2401 / 5000: loss 1.687958
iteration 2501 / 5000: loss 1.703212
iteration 2601 / 5000: loss 1.544348
iteration 2701 / 5000: loss 1.566488
iteration 2801 / 5000: loss 1.756422
iteration 2901 / 5000: loss 1.624771
iteration 3001 / 5000: loss 1.647981
iteration 3101 / 5000: loss 1.624651
iteration 3201 / 5000: loss 1.580691
iteration 3301 / 5000: loss 1.627093
iteration 3401 / 5000: loss 1.659882
iteration 3501 / 5000: loss 1.602352
iteration 3601 / 5000: loss 1.719313
iteration 3701 / 5000: loss 1.575136
iteration 3801 / 5000: loss 1.690630
iteration 3901 / 5000: loss 1.640411
iteration 4001 / 5000: loss 1.526751
iteration 4101 / 5000: loss 1.596954
iteration 4201 / 5000: loss 1.591691
iteration 4301 / 5000: loss 1.680037
iteration 4401 / 5000: loss 1.493575
iteration 4501 / 5000: loss 1.588050
iteration 4601 / 5000: loss 1.638900
iteration 4701 / 5000: loss 1.598790
iteration 4801 / 5000: loss 1.601114
iteration 4901 / 5000: loss 1.551229
hidden_layer_size: 50, lr: 3.000000e-04, reg: 9.000000e-01, train_acc: 0.494612, val_acc: 0.486000
iteration 1 / 5000: loss 2.449321
iteration 101 / 5000: loss 2.215104
iteration 201 / 5000: loss 2.079319
iteration 301 / 5000: loss 1.974501
iteration 401 / 5000: loss 1.853118
iteration 501 / 5000: loss 1.874310
iteration 601 / 5000: loss 1.799037
iteration 701 / 5000: loss 1.848519
iteration 801 / 5000: loss 1.956783
iteration 901 / 5000: loss 1.733930
iteration 1001 / 5000: loss 1.770525
iteration 1101 / 5000: loss 1.722408
iteration 1201 / 5000: loss 1.725642
iteration 1301 / 5000: loss 1.690206
iteration 1401 / 5000: loss 1.756073
iteration 1501 / 5000: loss 1.789380
iteration 1601 / 5000: loss 1.661055
iteration 1701 / 5000: loss 1.624347
iteration 1801 / 5000: loss 1.680973
iteration 1901 / 5000: loss 1.764589
iteration 2001 / 5000: loss 1.673944
iteration 2101 / 5000: loss 1.685413
iteration 2201 / 5000: loss 1.623023
iteration 2301 / 5000: loss 1.768487
iteration 2401 / 5000: loss 1.712108
iteration 2501 / 5000: loss 1.585581
iteration 2601 / 5000: loss 1.695190
iteration 2701 / 5000: loss 1.647915
iteration 2801 / 5000: loss 1.768477
iteration 2901 / 5000: loss 1.615983
iteration 3001 / 5000: loss 1.590348
iteration 3101 / 5000: loss 1.737815
iteration 3201 / 5000: loss 1.647073
iteration 3301 / 5000: loss 1.657719
iteration 3401 / 5000: loss 1.529472
iteration 3501 / 5000: loss 1.615249
iteration 3601 / 5000: loss 1.716291
iteration 3701 / 5000: loss 1.642936
iteration 3801 / 5000: loss 1.667776
iteration 3901 / 5000: loss 1.521122
iteration 4001 / 5000: loss 1.672603
iteration 4101 / 5000: loss 1.609479
iteration 4201 / 5000: loss 1.666670
iteration 4301 / 5000: loss 1.571400
iteration 4401 / 5000: loss 1.579785
iteration 4501 / 5000: loss 1.515378
iteration 4601 / 5000: loss 1.615027
iteration 4701 / 5000: loss 1.557653
iteration 4801 / 5000: loss 1.600093
iteration 4901 / 5000: loss 1.632438
hidden_layer_size: 50, lr: 3.000000e-04, reg: 1.000000e+00, train_acc: 0.494184, val_acc: 0.487000
iteration 1 / 5000: loss 2.410356
iteration 101 / 5000: loss 1.912126
iteration 201 / 5000: loss 1.902933
iteration 301 / 5000: loss 1.852738
iteration 401 / 5000: loss 1.754910
iteration 501 / 5000: loss 1.609960
iteration 601 / 5000: loss 1.635343
iteration 701 / 5000: loss 1.696662
iteration 801 / 5000: loss 1.704251
iteration 901 / 5000: loss 1.737743
iteration 1001 / 5000: loss 1.612900
iteration 1101 / 5000: loss 1.595094
iteration 1201 / 5000: loss 1.616778
iteration 1301 / 5000: loss 1.578644
iteration 1401 / 5000: loss 1.614397
iteration 1501 / 5000: loss 1.473924
iteration 1601 / 5000: loss 1.535307
iteration 1701 / 5000: loss 1.582054
iteration 1801 / 5000: loss 1.618429
iteration 1901 / 5000: loss 1.609618
iteration 2001 / 5000: loss 1.555397
iteration 2101 / 5000: loss 1.608955
iteration 2201 / 5000: loss 1.678534
iteration 2301 / 5000: loss 1.545709
iteration 2401 / 5000: loss 1.505825
iteration 2501 / 5000: loss 1.505607
iteration 2601 / 5000: loss 1.578869
iteration 2701 / 5000: loss 1.458720
iteration 2801 / 5000: loss 1.557589
iteration 2901 / 5000: loss 1.502304
iteration 3001 / 5000: loss 1.656756
iteration 3101 / 5000: loss 1.495635
iteration 3201 / 5000: loss 1.645036
iteration 3301 / 5000: loss 1.432810
iteration 3401 / 5000: loss 1.487877
iteration 3501 / 5000: loss 1.545028
iteration 3601 / 5000: loss 1.632838
iteration 3701 / 5000: loss 1.575399
iteration 3801 / 5000: loss 1.698916
iteration 3901 / 5000: loss 1.474102
iteration 4001 / 5000: loss 1.461715
iteration 4101 / 5000: loss 1.566870
iteration 4201 / 5000: loss 1.454293
iteration 4301 / 5000: loss 1.568163
iteration 4401 / 5000: loss 1.572119
iteration 4501 / 5000: loss 1.446309
iteration 4601 / 5000: loss 1.510652
iteration 4701 / 5000: loss 1.459831
iteration 4801 / 5000: loss 1.495238
iteration 4901 / 5000: loss 1.502663
hidden_layer_size: 50, lr: 9.000000e-04, reg: 7.000000e-01, train_acc: 0.533082, val_acc: 0.495000
iteration 1 / 5000: loss 2.427359
iteration 101 / 5000: loss 2.002654
iteration 201 / 5000: loss 1.759198
iteration 301 / 5000: loss 1.711776
iteration 401 / 5000: loss 1.843849
iteration 501 / 5000: loss 1.729990
iteration 601 / 5000: loss 1.746596
iteration 701 / 5000: loss 1.643138
iteration 801 / 5000: loss 1.671120
iteration 901 / 5000: loss 1.708569
iteration 1001 / 5000: loss 1.626997
iteration 1101 / 5000: loss 1.703050
iteration 1201 / 5000: loss 1.610826
iteration 1301 / 5000: loss 1.621018
iteration 1401 / 5000: loss 1.545155
iteration 1501 / 5000: loss 1.830826
iteration 1601 / 5000: loss 1.542515
iteration 1701 / 5000: loss 1.629082
iteration 1801 / 5000: loss 1.520701
iteration 1901 / 5000: loss 1.639636
iteration 2001 / 5000: loss 1.542157
iteration 2101 / 5000: loss 1.621997
iteration 2201 / 5000: loss 1.606738
iteration 2301 / 5000: loss 1.569742
iteration 2401 / 5000: loss 1.580212
iteration 2501 / 5000: loss 1.442048
iteration 2601 / 5000: loss 1.664999
iteration 2701 / 5000: loss 1.646400
iteration 2801 / 5000: loss 1.524228
iteration 2901 / 5000: loss 1.654324
iteration 3001 / 5000: loss 1.589383
iteration 3101 / 5000: loss 1.675237
iteration 3201 / 5000: loss 1.589844
iteration 3301 / 5000: loss 1.569137
iteration 3401 / 5000: loss 1.456047
iteration 3501 / 5000: loss 1.595992
iteration 3601 / 5000: loss 1.572865
iteration 3701 / 5000: loss 1.543767
iteration 3801 / 5000: loss 1.577963
iteration 3901 / 5000: loss 1.614468
iteration 4001 / 5000: loss 1.486841
iteration 4101 / 5000: loss 1.659688
iteration 4201 / 5000: loss 1.482823
iteration 4301 / 5000: loss 1.567118
iteration 4401 / 5000: loss 1.627538
iteration 4501 / 5000: loss 1.524613
iteration 4601 / 5000: loss 1.511288
iteration 4701 / 5000: loss 1.576008
iteration 4801 / 5000: loss 1.585694
iteration 4901 / 5000: loss 1.530811
hidden_layer_size: 50, lr: 9.000000e-04, reg: 8.000000e-01, train_acc: 0.529490, val_acc: 0.499000
iteration 1 / 5000: loss 2.442946
iteration 101 / 5000: loss 2.006257
iteration 201 / 5000: loss 1.779370
iteration 301 / 5000: loss 1.765025
iteration 401 / 5000: loss 1.731169
iteration 501 / 5000: loss 1.761445
iteration 601 / 5000: loss 1.510700
iteration 701 / 5000: loss 1.616954
iteration 801 / 5000: loss 1.586672
iteration 901 / 5000: loss 1.653583
iteration 1001 / 5000: loss 1.803018
iteration 1101 / 5000: loss 1.706154
iteration 1201 / 5000: loss 1.696805
iteration 1301 / 5000: loss 1.683088
iteration 1401 / 5000: loss 1.600967
iteration 1501 / 5000: loss 1.638499
iteration 1601 / 5000: loss 1.578823
iteration 1701 / 5000: loss 1.582748
iteration 1801 / 5000: loss 1.643480
iteration 1901 / 5000: loss 1.614718
iteration 2001 / 5000: loss 1.581021
iteration 2101 / 5000: loss 1.674719
iteration 2201 / 5000: loss 1.651773
iteration 2301 / 5000: loss 1.638304
iteration 2401 / 5000: loss 1.688920
iteration 2501 / 5000: loss 1.632117
iteration 2601 / 5000: loss 1.692494
iteration 2701 / 5000: loss 1.540684
iteration 2801 / 5000: loss 1.597368
iteration 2901 / 5000: loss 1.679163
iteration 3001 / 5000: loss 1.645066
iteration 3101 / 5000: loss 1.567624
iteration 3201 / 5000: loss 1.619583
iteration 3301 / 5000: loss 1.576671
iteration 3401 / 5000: loss 1.620777
iteration 3501 / 5000: loss 1.632127
iteration 3601 / 5000: loss 1.573995
iteration 3701 / 5000: loss 1.616885
iteration 3801 / 5000: loss 1.588414
iteration 3901 / 5000: loss 1.692021
iteration 4001 / 5000: loss 1.594353
iteration 4101 / 5000: loss 1.607120
iteration 4201 / 5000: loss 1.559214
iteration 4301 / 5000: loss 1.683575
iteration 4401 / 5000: loss 1.687762
iteration 4501 / 5000: loss 1.712059
iteration 4601 / 5000: loss 1.561996
iteration 4701 / 5000: loss 1.653669
iteration 4801 / 5000: loss 1.635627
iteration 4901 / 5000: loss 1.570240
hidden_layer_size: 50, lr: 9.000000e-04, reg: 9.000000e-01, train_acc: 0.523714, val_acc: 0.489000
iteration 1 / 5000: loss 2.456239
iteration 101 / 5000: loss 2.075234
iteration 201 / 5000: loss 1.839561
iteration 301 / 5000: loss 1.795984
iteration 401 / 5000: loss 1.743414
iteration 501 / 5000: loss 1.670168
iteration 601 / 5000: loss 1.567970
iteration 701 / 5000: loss 1.678423
iteration 801 / 5000: loss 1.745050
iteration 901 / 5000: loss 1.806679
iteration 1001 / 5000: loss 1.702558
iteration 1101 / 5000: loss 1.659625
iteration 1201 / 5000: loss 1.707160
iteration 1301 / 5000: loss 1.561432
iteration 1401 / 5000: loss 1.653963
iteration 1501 / 5000: loss 1.605561
iteration 1601 / 5000: loss 1.629088
iteration 1701 / 5000: loss 1.598716
iteration 1801 / 5000: loss 1.605527
iteration 1901 / 5000: loss 1.560287
iteration 2001 / 5000: loss 1.590624
iteration 2101 / 5000: loss 1.572512
iteration 2201 / 5000: loss 1.636139
iteration 2301 / 5000: loss 1.669273
iteration 2401 / 5000: loss 1.628409
iteration 2501 / 5000: loss 1.622587
iteration 2601 / 5000: loss 1.624992
iteration 2701 / 5000: loss 1.702408
iteration 2801 / 5000: loss 1.630381
iteration 2901 / 5000: loss 1.533254
iteration 3001 / 5000: loss 1.606394
iteration 3101 / 5000: loss 1.613779
iteration 3201 / 5000: loss 1.763971
iteration 3301 / 5000: loss 1.626917
iteration 3401 / 5000: loss 1.618677
iteration 3501 / 5000: loss 1.489478
iteration 3601 / 5000: loss 1.592777
iteration 3701 / 5000: loss 1.710834
iteration 3801 / 5000: loss 1.655450
iteration 3901 / 5000: loss 1.703332
iteration 4001 / 5000: loss 1.489270
iteration 4101 / 5000: loss 1.683961
iteration 4201 / 5000: loss 1.675000
iteration 4301 / 5000: loss 1.507502
iteration 4401 / 5000: loss 1.634462
iteration 4501 / 5000: loss 1.682081
iteration 4601 / 5000: loss 1.656801
iteration 4701 / 5000: loss 1.622829
iteration 4801 / 5000: loss 1.520234
iteration 4901 / 5000: loss 1.563238
hidden_layer_size: 50, lr: 9.000000e-04, reg: 1.000000e+00, train_acc: 0.514755, val_acc: 0.487000
iteration 1 / 5000: loss 2.410724
iteration 101 / 5000: loss 1.946099
iteration 201 / 5000: loss 1.656245
iteration 301 / 5000: loss 1.713223
iteration 401 / 5000: loss 1.612764
iteration 501 / 5000: loss 1.724715
iteration 601 / 5000: loss 1.764600
iteration 701 / 5000: loss 1.664367
iteration 801 / 5000: loss 1.649581
iteration 901 / 5000: loss 1.554899
iteration 1001 / 5000: loss 1.672089
iteration 1101 / 5000: loss 1.513489
iteration 1201 / 5000: loss 1.646212
iteration 1301 / 5000: loss 1.496682
iteration 1401 / 5000: loss 1.494121
iteration 1501 / 5000: loss 1.542276
iteration 1601 / 5000: loss 1.605371
iteration 1701 / 5000: loss 1.468398
iteration 1801 / 5000: loss 1.430812
iteration 1901 / 5000: loss 1.695256
iteration 2001 / 5000: loss 1.441603
iteration 2101 / 5000: loss 1.562543
iteration 2201 / 5000: loss 1.609819
iteration 2301 / 5000: loss 1.558494
iteration 2401 / 5000: loss 1.498648
iteration 2501 / 5000: loss 1.561292
iteration 2601 / 5000: loss 1.581410
iteration 2701 / 5000: loss 1.615184
iteration 2801 / 5000: loss 1.427294
iteration 2901 / 5000: loss 1.620716
iteration 3001 / 5000: loss 1.601757
iteration 3101 / 5000: loss 1.516932
iteration 3201 / 5000: loss 1.546777
iteration 3301 / 5000: loss 1.480112
iteration 3401 / 5000: loss 1.737246
iteration 3501 / 5000: loss 1.541492
iteration 3601 / 5000: loss 1.491249
iteration 3701 / 5000: loss 1.572659
iteration 3801 / 5000: loss 1.594898
iteration 3901 / 5000: loss 1.474111
iteration 4001 / 5000: loss 1.672391
iteration 4101 / 5000: loss 1.582733
iteration 4201 / 5000: loss 1.568035
iteration 4301 / 5000: loss 1.491607
iteration 4401 / 5000: loss 1.630314
iteration 4501 / 5000: loss 1.486603
iteration 4601 / 5000: loss 1.600044
iteration 4701 / 5000: loss 1.490755
iteration 4801 / 5000: loss 1.681420
iteration 4901 / 5000: loss 1.615046
hidden_layer_size: 50, lr: 1.000000e-03, reg: 7.000000e-01, train_acc: 0.530490, val_acc: 0.506000
iteration 1 / 5000: loss 2.425752
iteration 101 / 5000: loss 1.962657
iteration 201 / 5000: loss 1.863897
iteration 301 / 5000: loss 1.752351
iteration 401 / 5000: loss 1.699809
iteration 501 / 5000: loss 1.675494
iteration 601 / 5000: loss 1.710208
iteration 701 / 5000: loss 1.676544
iteration 801 / 5000: loss 1.691680
iteration 901 / 5000: loss 1.670570
iteration 1001 / 5000: loss 1.625385
iteration 1101 / 5000: loss 1.569421
iteration 1201 / 5000: loss 1.571140
iteration 1301 / 5000: loss 1.661030
iteration 1401 / 5000: loss 1.626568
iteration 1501 / 5000: loss 1.626132
iteration 1601 / 5000: loss 1.615750
iteration 1701 / 5000: loss 1.680352
iteration 1801 / 5000: loss 1.654851
iteration 1901 / 5000: loss 1.610617
iteration 2001 / 5000: loss 1.530851
iteration 2101 / 5000: loss 1.394799
iteration 2201 / 5000: loss 1.521879
iteration 2301 / 5000: loss 1.612318
iteration 2401 / 5000: loss 1.575531
iteration 2501 / 5000: loss 1.626266
iteration 2601 / 5000: loss 1.575153
iteration 2701 / 5000: loss 1.630251
iteration 2801 / 5000: loss 1.587398
iteration 2901 / 5000: loss 1.653894
iteration 3001 / 5000: loss 1.669521
iteration 3101 / 5000: loss 1.684621
iteration 3201 / 5000: loss 1.497962
iteration 3301 / 5000: loss 1.603073
iteration 3401 / 5000: loss 1.650218
iteration 3501 / 5000: loss 1.576033
iteration 3601 / 5000: loss 1.583862
iteration 3701 / 5000: loss 1.640827
iteration 3801 / 5000: loss 1.561770
iteration 3901 / 5000: loss 1.506129
iteration 4001 / 5000: loss 1.635286
iteration 4101 / 5000: loss 1.565186
iteration 4201 / 5000: loss 1.652229
iteration 4301 / 5000: loss 1.712574
iteration 4401 / 5000: loss 1.530746
iteration 4501 / 5000: loss 1.451661
iteration 4601 / 5000: loss 1.600384
iteration 4701 / 5000: loss 1.590342
iteration 4801 / 5000: loss 1.544466
iteration 4901 / 5000: loss 1.576054
hidden_layer_size: 50, lr: 1.000000e-03, reg: 8.000000e-01, train_acc: 0.527694, val_acc: 0.496000
iteration 1 / 5000: loss 2.442707
iteration 101 / 5000: loss 1.937146
iteration 201 / 5000: loss 1.815597
iteration 301 / 5000: loss 1.765431
iteration 401 / 5000: loss 1.700788
iteration 501 / 5000: loss 1.679822
iteration 601 / 5000: loss 1.764963
iteration 701 / 5000: loss 1.636598
iteration 801 / 5000: loss 1.673494
iteration 901 / 5000: loss 1.534676
iteration 1001 / 5000: loss 1.613225
iteration 1101 / 5000: loss 1.733771
iteration 1201 / 5000: loss 1.726583
iteration 1301 / 5000: loss 1.586051
iteration 1401 / 5000: loss 1.656604
iteration 1501 / 5000: loss 1.534998
iteration 1601 / 5000: loss 1.679995
iteration 1701 / 5000: loss 1.552681
iteration 1801 / 5000: loss 1.733442
iteration 1901 / 5000: loss 1.617446
iteration 2001 / 5000: loss 1.626044
iteration 2101 / 5000: loss 1.551818
iteration 2201 / 5000: loss 1.470854
iteration 2301 / 5000: loss 1.661086
iteration 2401 / 5000: loss 1.577598
iteration 2501 / 5000: loss 1.546606
iteration 2601 / 5000: loss 1.561523
iteration 2701 / 5000: loss 1.469055
iteration 2801 / 5000: loss 1.648482
iteration 2901 / 5000: loss 1.641542
iteration 3001 / 5000: loss 1.614564
iteration 3101 / 5000: loss 1.584410
iteration 3201 / 5000: loss 1.588124
iteration 3301 / 5000: loss 1.681532
iteration 3401 / 5000: loss 1.615463
iteration 3501 / 5000: loss 1.584807
iteration 3601 / 5000: loss 1.603297
iteration 3701 / 5000: loss 1.724373
iteration 3801 / 5000: loss 1.591655
iteration 3901 / 5000: loss 1.702534
iteration 4001 / 5000: loss 1.623340
iteration 4101 / 5000: loss 1.641294
iteration 4201 / 5000: loss 1.635042
iteration 4301 / 5000: loss 1.673128
iteration 4401 / 5000: loss 1.682140
iteration 4501 / 5000: loss 1.449161
iteration 4601 / 5000: loss 1.525111
iteration 4701 / 5000: loss 1.530529
iteration 4801 / 5000: loss 1.569994
iteration 4901 / 5000: loss 1.574141
hidden_layer_size: 50, lr: 1.000000e-03, reg: 9.000000e-01, train_acc: 0.517122, val_acc: 0.496000
iteration 1 / 5000: loss 2.456708
iteration 101 / 5000: loss 1.896568
iteration 201 / 5000: loss 1.965177
iteration 301 / 5000: loss 1.785737
iteration 401 / 5000: loss 1.873589
iteration 501 / 5000: loss 1.683588
iteration 601 / 5000: loss 1.735273
iteration 701 / 5000: loss 1.556873
iteration 801 / 5000: loss 1.671576
iteration 901 / 5000: loss 1.675479
iteration 1001 / 5000: loss 1.471181
iteration 1101 / 5000: loss 1.700672
iteration 1201 / 5000: loss 1.671824
iteration 1301 / 5000: loss 1.694444
iteration 1401 / 5000: loss 1.547663
iteration 1501 / 5000: loss 1.646269
iteration 1601 / 5000: loss 1.638506
iteration 1701 / 5000: loss 1.623645
iteration 1801 / 5000: loss 1.585266
iteration 1901 / 5000: loss 1.620095
iteration 2001 / 5000: loss 1.686716
iteration 2101 / 5000: loss 1.747354
iteration 2201 / 5000: loss 1.792917
iteration 2301 / 5000: loss 1.599611
iteration 2401 / 5000: loss 1.595990
iteration 2501 / 5000: loss 1.489155
iteration 2601 / 5000: loss 1.591050
iteration 2701 / 5000: loss 1.603958
iteration 2801 / 5000: loss 1.606772
iteration 2901 / 5000: loss 1.634377
iteration 3001 / 5000: loss 1.589244
iteration 3101 / 5000: loss 1.650553
iteration 3201 / 5000: loss 1.686622
iteration 3301 / 5000: loss 1.570713
iteration 3401 / 5000: loss 1.649472
iteration 3501 / 5000: loss 1.607949
iteration 3601 / 5000: loss 1.601935
iteration 3701 / 5000: loss 1.697594
iteration 3801 / 5000: loss 1.547300
iteration 3901 / 5000: loss 1.733764
iteration 4001 / 5000: loss 1.576811
iteration 4101 / 5000: loss 1.677788
iteration 4201 / 5000: loss 1.686394
iteration 4301 / 5000: loss 1.681963
iteration 4401 / 5000: loss 1.499082
iteration 4501 / 5000: loss 1.595599
iteration 4601 / 5000: loss 1.554476
iteration 4701 / 5000: loss 1.489696
iteration 4801 / 5000: loss 1.696233
iteration 4901 / 5000: loss 1.677301
hidden_layer_size: 50, lr: 1.000000e-03, reg: 1.000000e+00, train_acc: 0.520612, val_acc: 0.472000
iteration 1 / 5000: loss 2.411576
iteration 101 / 5000: loss 1.836156
iteration 201 / 5000: loss 1.885800
iteration 301 / 5000: loss 1.893541
iteration 401 / 5000: loss 1.844543
iteration 501 / 5000: loss 1.809642
iteration 601 / 5000: loss 1.657675
iteration 701 / 5000: loss 1.780945
iteration 801 / 5000: loss 1.601904
iteration 901 / 5000: loss 1.706981
iteration 1001 / 5000: loss 1.979295
iteration 1101 / 5000: loss 1.751661
iteration 1201 / 5000: loss 1.847542
iteration 1301 / 5000: loss 1.618903
iteration 1401 / 5000: loss 1.765442
iteration 1501 / 5000: loss 1.753777
iteration 1601 / 5000: loss 1.637120
iteration 1701 / 5000: loss 1.785420
iteration 1801 / 5000: loss 1.653437
iteration 1901 / 5000: loss 1.630546
iteration 2001 / 5000: loss 1.709852
iteration 2101 / 5000: loss 1.664592
iteration 2201 / 5000: loss 1.682222
iteration 2301 / 5000: loss 1.654162
iteration 2401 / 5000: loss 1.686332
iteration 2501 / 5000: loss 1.668635
iteration 2601 / 5000: loss 1.597970
iteration 2701 / 5000: loss 1.636637
iteration 2801 / 5000: loss 1.716199
iteration 2901 / 5000: loss 1.690445
iteration 3001 / 5000: loss 1.691788
iteration 3101 / 5000: loss 1.625525
iteration 3201 / 5000: loss 1.589346
iteration 3301 / 5000: loss 1.723794
iteration 3401 / 5000: loss 1.549846
iteration 3501 / 5000: loss 1.510103
iteration 3601 / 5000: loss 1.698321
iteration 3701 / 5000: loss 1.548404
iteration 3801 / 5000: loss 1.663732
iteration 3901 / 5000: loss 1.621197
iteration 4001 / 5000: loss 1.578743
iteration 4101 / 5000: loss 1.630511
iteration 4201 / 5000: loss 1.765611
iteration 4301 / 5000: loss 1.462404
iteration 4401 / 5000: loss 1.696718
iteration 4501 / 5000: loss 1.513799
iteration 4601 / 5000: loss 1.560826
iteration 4701 / 5000: loss 1.564488
iteration 4801 / 5000: loss 1.531402
iteration 4901 / 5000: loss 1.761563
hidden_layer_size: 50, lr: 3.000000e-03, reg: 7.000000e-01, train_acc: 0.518082, val_acc: 0.511000
iteration 1 / 5000: loss 2.422039
iteration 101 / 5000: loss 1.813170
iteration 201 / 5000: loss 1.715150
iteration 301 / 5000: loss 1.841663
iteration 401 / 5000: loss 1.744498
iteration 501 / 5000: loss 1.957277
iteration 601 / 5000: loss 1.639745
iteration 701 / 5000: loss 1.949596
iteration 801 / 5000: loss 1.731423
iteration 901 / 5000: loss 1.869320
iteration 1001 / 5000: loss 1.769447
iteration 1101 / 5000: loss 1.705494
iteration 1201 / 5000: loss 1.691169
iteration 1301 / 5000: loss 1.641915
iteration 1401 / 5000: loss 1.882545
iteration 1501 / 5000: loss 1.602276
iteration 1601 / 5000: loss 1.633667
iteration 1701 / 5000: loss 1.775711
iteration 1801 / 5000: loss 1.687100
iteration 1901 / 5000: loss 1.759698
iteration 2001 / 5000: loss 1.492900
iteration 2101 / 5000: loss 1.765351
iteration 2201 / 5000: loss 1.636466
iteration 2301 / 5000: loss 1.638094
iteration 2401 / 5000: loss 1.716062
iteration 2501 / 5000: loss 1.716907
iteration 2601 / 5000: loss 1.639567
iteration 2701 / 5000: loss 1.677390
iteration 2801 / 5000: loss 1.637646
iteration 2901 / 5000: loss 1.690357
iteration 3001 / 5000: loss 1.692102
iteration 3101 / 5000: loss 1.615892
iteration 3201 / 5000: loss 1.634743
iteration 3301 / 5000: loss 1.611996
iteration 3401 / 5000: loss 1.741204
iteration 3501 / 5000: loss 1.522721
iteration 3601 / 5000: loss 1.635729
iteration 3701 / 5000: loss 1.594717
iteration 3801 / 5000: loss 1.655441
iteration 3901 / 5000: loss 1.693226
iteration 4001 / 5000: loss 1.596081
iteration 4101 / 5000: loss 1.652881
iteration 4201 / 5000: loss 1.635261
iteration 4301 / 5000: loss 1.625867
iteration 4401 / 5000: loss 1.620940
iteration 4501 / 5000: loss 1.665441
iteration 4601 / 5000: loss 1.638988
iteration 4701 / 5000: loss 1.672426
iteration 4801 / 5000: loss 1.552613
iteration 4901 / 5000: loss 1.530297
hidden_layer_size: 50, lr: 3.000000e-03, reg: 8.000000e-01, train_acc: 0.516796, val_acc: 0.491000
iteration 1 / 5000: loss 2.440985
iteration 101 / 5000: loss 1.837533
iteration 201 / 5000: loss 1.787138
iteration 301 / 5000: loss 2.088714
iteration 401 / 5000: loss 2.037268
iteration 501 / 5000: loss 1.827009
iteration 601 / 5000: loss 1.823490
iteration 701 / 5000: loss 1.760280
iteration 801 / 5000: loss 1.723934
iteration 901 / 5000: loss 1.710038
iteration 1001 / 5000: loss 1.744794
iteration 1101 / 5000: loss 1.754833
iteration 1201 / 5000: loss 1.781314
iteration 1301 / 5000: loss 1.631397
iteration 1401 / 5000: loss 1.755319
iteration 1501 / 5000: loss 1.883567
iteration 1601 / 5000: loss 1.663161
iteration 1701 / 5000: loss 1.632734
iteration 1801 / 5000: loss 1.799940
iteration 1901 / 5000: loss 1.711048
iteration 2001 / 5000: loss 1.580237
iteration 2101 / 5000: loss 1.699291
iteration 2201 / 5000: loss 1.651983
iteration 2301 / 5000: loss 1.758508
iteration 2401 / 5000: loss 1.776575
iteration 2501 / 5000: loss 1.845505
iteration 2601 / 5000: loss 1.715325
iteration 2701 / 5000: loss 1.668014
iteration 2801 / 5000: loss 1.744954
iteration 2901 / 5000: loss 1.803629
iteration 3001 / 5000: loss 1.680518
iteration 3101 / 5000: loss 1.809398
iteration 3201 / 5000: loss 1.740159
iteration 3301 / 5000: loss 1.544197
iteration 3401 / 5000: loss 1.554805
iteration 3501 / 5000: loss 1.705149
iteration 3601 / 5000: loss 1.691910
iteration 3701 / 5000: loss 1.652844
iteration 3801 / 5000: loss 1.580678
iteration 3901 / 5000: loss 1.690269
iteration 4001 / 5000: loss 1.672617
iteration 4101 / 5000: loss 1.611390
iteration 4201 / 5000: loss 1.625859
iteration 4301 / 5000: loss 1.542807
iteration 4401 / 5000: loss 1.655772
iteration 4501 / 5000: loss 1.594376
iteration 4601 / 5000: loss 1.667600
iteration 4701 / 5000: loss 1.565942
iteration 4801 / 5000: loss 1.672305
iteration 4901 / 5000: loss 1.493342
hidden_layer_size: 50, lr: 3.000000e-03, reg: 9.000000e-01, train_acc: 0.505898, val_acc: 0.487000
iteration 1 / 5000: loss 2.456797
iteration 101 / 5000: loss 1.892806
iteration 201 / 5000: loss 1.875345
iteration 301 / 5000: loss 1.734931
iteration 401 / 5000: loss 1.842772
iteration 501 / 5000: loss 1.926247
iteration 601 / 5000: loss 1.714435
iteration 701 / 5000: loss 1.814758
iteration 801 / 5000: loss 1.879241
iteration 901 / 5000: loss 1.780312
iteration 1001 / 5000: loss 1.981747
iteration 1101 / 5000: loss 1.807644
iteration 1201 / 5000: loss 1.800673
iteration 1301 / 5000: loss 1.689968
iteration 1401 / 5000: loss 1.852438
iteration 1501 / 5000: loss 1.660521
iteration 1601 / 5000: loss 1.636540
iteration 1701 / 5000: loss 1.687910
iteration 1801 / 5000: loss 1.714870
iteration 1901 / 5000: loss 1.805553
iteration 2001 / 5000: loss 1.642245
iteration 2101 / 5000: loss 1.780806
iteration 2201 / 5000: loss 1.850754
iteration 2301 / 5000: loss 1.685324
iteration 2401 / 5000: loss 1.897079
iteration 2501 / 5000: loss 1.597512
iteration 2601 / 5000: loss 1.614858
iteration 2701 / 5000: loss 1.699866
iteration 2801 / 5000: loss 1.777578
iteration 2901 / 5000: loss 1.667910
iteration 3001 / 5000: loss 1.707668
iteration 3101 / 5000: loss 1.805310
iteration 3201 / 5000: loss 1.640255
iteration 3301 / 5000: loss 1.815660
iteration 3401 / 5000: loss 1.677882
iteration 3501 / 5000: loss 1.697069
iteration 3601 / 5000: loss 1.677437
iteration 3701 / 5000: loss 1.651822
iteration 3801 / 5000: loss 1.526709
iteration 3901 / 5000: loss 1.713597
iteration 4001 / 5000: loss 1.560245
iteration 4101 / 5000: loss 1.570371
iteration 4201 / 5000: loss 1.623433
iteration 4301 / 5000: loss 1.589593
iteration 4401 / 5000: loss 1.700008
iteration 4501 / 5000: loss 1.610730
iteration 4601 / 5000: loss 1.647724
iteration 4701 / 5000: loss 1.635629
iteration 4801 / 5000: loss 1.684117
iteration 4901 / 5000: loss 1.735978
hidden_layer_size: 50, lr: 3.000000e-03, reg: 1.000000e+00, train_acc: 0.496857, val_acc: 0.477000
best val_acc: 0.511000
hidden_layer_size: 50, lr: 3.000000e-04, reg: 7.000000e-01, train_acc: 0.500429, val_acc: 0.485000
hidden_layer_size: 50, lr: 3.000000e-04, reg: 8.000000e-01, train_acc: 0.500510, val_acc: 0.482000
hidden_layer_size: 50, lr: 3.000000e-04, reg: 9.000000e-01, train_acc: 0.494612, val_acc: 0.486000
hidden_layer_size: 50, lr: 3.000000e-04, reg: 1.000000e+00, train_acc: 0.494184, val_acc: 0.487000
hidden_layer_size: 50, lr: 9.000000e-04, reg: 7.000000e-01, train_acc: 0.533082, val_acc: 0.495000
hidden_layer_size: 50, lr: 9.000000e-04, reg: 8.000000e-01, train_acc: 0.529490, val_acc: 0.499000
hidden_layer_size: 50, lr: 9.000000e-04, reg: 9.000000e-01, train_acc: 0.523714, val_acc: 0.489000
hidden_layer_size: 50, lr: 9.000000e-04, reg: 1.000000e+00, train_acc: 0.514755, val_acc: 0.487000
hidden_layer_size: 50, lr: 1.000000e-03, reg: 7.000000e-01, train_acc: 0.530490, val_acc: 0.506000
hidden_layer_size: 50, lr: 1.000000e-03, reg: 8.000000e-01, train_acc: 0.527694, val_acc: 0.496000
hidden_layer_size: 50, lr: 1.000000e-03, reg: 9.000000e-01, train_acc: 0.517122, val_acc: 0.496000
hidden_layer_size: 50, lr: 1.000000e-03, reg: 1.000000e+00, train_acc: 0.520612, val_acc: 0.472000
hidden_layer_size: 50, lr: 3.000000e-03, reg: 7.000000e-01, train_acc: 0.518082, val_acc: 0.511000
hidden_layer_size: 50, lr: 3.000000e-03, reg: 8.000000e-01, train_acc: 0.516796, val_acc: 0.491000
hidden_layer_size: 50, lr: 3.000000e-03, reg: 9.000000e-01, train_acc: 0.505898, val_acc: 0.487000
hidden_layer_size: 50, lr: 3.000000e-03, reg: 1.000000e+00, train_acc: 0.496857, val_acc: 0.477000