notebook.community

Edit and run



In [35]:

    
import pickle
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np



In [36]:

    
df = pickle.load( open( "2015-12-12-mlpexperiments_results5.p", "rb" ) )
df.shape









    Out[36]:





(50, 20)



In [37]:

    
df[df['training_size'] == 500000]









    Out[37]:






  
    
      
      cm_overall
      epochs
      network
      pct_white
      test_size
      training_size
      p_cm
      P_cm
      r_cm
      R_cm
      n_cm
      N_cm
      b_cm
      B_cm
      q_cm
      Q_cm
      k_cm
      K_cm
      white_cm
      black_cm
    
  
  
    
      1
      [[3807, 559], [1165, 4469]]
      5
      {"layers": [{"b_constraint": null, "name": "De...
      0.5028
      10000
      500000
      [[504, 99], [70, 500]]
      [[462, 174], [32, 591]]
      [[314, 113], [77, 368]]
      [[342, 136], [58, 466]]
      [[268, 37], [49, 267]]
      [[293, 110], [13, 381]]
      [[259, 47], [85, 217]]
      [[276, 111], [25, 367]]
      [[301, 67], [62, 287]]
      [[295, 121], [27, 443]]
      [[229, 59], [39, 245]]
      [[264, 91], [22, 337]]
      [[1932, 743], [177, 2585]]
      [[1875, 422], [382, 1884]]
    
    
      1
      [[3601, 369], [1371, 4659]]
      5
      {"layers": [{"b_constraint": null, "name": "De...
      0.5028
      10000
      500000
      [[464, 139], [20, 550]]
      [[480, 156], [37, 586]]
      [[259, 168], [38, 407]]
      [[336, 142], [58, 466]]
      [[231, 74], [36, 280]]
      [[295, 108], [12, 382]]
      [[222, 84], [34, 268]]
      [[291, 96], [37, 355]]
      [[273, 95], [22, 327]]
      [[302, 114], [37, 433]]
      [[202, 86], [16, 268]]
      [[246, 109], [22, 337]]
      [[1950, 725], [203, 2559]]
      [[1651, 646], [166, 2100]]
    
    
      1
      [[3854, 567], [1118, 4461]]
      5
      {"layers": [{"b_constraint": null, "name": "De...
      0.5028
      10000
      500000
      [[519, 84], [57, 513]]
      [[486, 150], [35, 588]]
      [[304, 123], [61, 384]]
      [[359, 119], [76, 448]]
      [[275, 30], [52, 264]]
      [[297, 106], [19, 375]]
      [[256, 50], [62, 240]]
      [[290, 97], [46, 346]]
      [[291, 77], [45, 304]]
      [[301, 115], [43, 427]]
      [[223, 65], [32, 252]]
      [[253, 102], [39, 320]]
      [[1986, 689], [258, 2504]]
      [[1868, 429], [309, 1957]]
    
    
      1
      [[3945, 492], [1027, 4536]]
      5
      {"layers": [{"b_constraint": null, "name": "De...
      0.5028
      10000
      500000
      [[509, 94], [38, 532]]
      [[512, 124], [53, 570]]
      [[307, 120], [54, 391]]
      [[373, 105], [70, 454]]
      [[256, 49], [37, 279]]
      [[329, 74], [14, 380]]
      [[246, 60], [44, 258]]
      [[308, 79], [42, 350]]
      [[291, 77], [39, 310]]
      [[329, 87], [42, 428]]
      [[219, 69], [30, 254]]
      [[266, 89], [29, 330]]
      [[2117, 558], [250, 2512]]
      [[1828, 469], [242, 2024]]
    
    
      1
      [[3811, 762], [1161, 4266]]
      5
      {"layers": [{"b_constraint": null, "name": "De...
      0.5028
      10000
      500000
      [[532, 71], [90, 480]]
      [[479, 157], [33, 590]]
      [[311, 116], [106, 339]]
      [[340, 138], [47, 477]]
      [[269, 36], [113, 203]]
      [[282, 121], [21, 373]]
      [[259, 47], [111, 191]]
      [[279, 108], [37, 355]]
      [[304, 64], [97, 252]]
      [[290, 126], [33, 437]]
      [[219, 69], [49, 235]]
      [[247, 108], [25, 334]]
      [[1917, 758], [196, 2566]]
      [[1894, 403], [566, 1700]]



In [38]:

    
#methods to decode serialized network json
import json

def get_num_layers(json_str):
    # number of actual layers - 5 for input and output / 3 for each hidden + 2 for input and output
    return (len(json.loads(json_str)['layers']) - 5) / 3 + 2

def get_first_activation(json_str):
    return json.loads(json_str)['layers'][1]['activation']

def get_first_width(json_str):
    return json.loads(json_str)['layers'][0]['output_dim']



In [39]:

    
#convert confusion matrices to accuracy
def cm2accuracy(cm):
    return (cm[0][0] + cm[1][1] * 1.0) / sum([sum(a) for a in cm])
    
df['overall_acc'] = df['cm_overall'].apply(cm2accuracy)
df['width'] = df['network'].apply(get_first_width)
df['num_layers'] = df['network'].apply(get_num_layers)



In [40]:

    
df.head()









    Out[40]:






  
    
      
      cm_overall
      epochs
      network
      pct_white
      test_size
      training_size
      p_cm
      P_cm
      r_cm
      R_cm
      ...
      B_cm
      q_cm
      Q_cm
      k_cm
      K_cm
      white_cm
      black_cm
      overall_acc
      width
      num_layers
    
  
  
    
      1
      [[3149, 1421], [1823, 3607]]
      5
      {"layers": [{"b_constraint": null, "name": "De...
      0.5028
      10000
      50000
      [[356, 247], [39, 531]]
      [[471, 165], [106, 517]]
      [[186, 241], [118, 327]]
      [[355, 123], [258, 266]]
      ...
      [[287, 100], [168, 224]]
      [[199, 169], [55, 294]]
      [[302, 114], [172, 298]]
      [[138, 150], [86, 198]]
      [[231, 124], [145, 214]]
      [[1937, 738], [982, 1780]]
      [[1212, 1085], [439, 1827]]
      0.6756
      512
      3
    
    
      1
      [[2792, 1101], [2180, 3927]]
      5
      {"layers": [{"b_constraint": null, "name": "De...
      0.5028
      10000
      50000
      [[376, 227], [56, 514]]
      [[361, 275], [65, 558]]
      [[185, 242], [116, 329]]
      [[293, 185], [176, 348]]
      ...
      [[236, 151], [107, 285]]
      [[185, 183], [62, 287]]
      [[246, 170], [111, 359]]
      [[140, 148], [72, 212]]
      [[197, 158], [94, 265]]
      [[1563, 1112], [640, 2122]]
      [[1229, 1068], [461, 1805]]
      0.6719
      512
      3
    
    
      1
      [[2889, 1284], [2083, 3744]]
      5
      {"layers": [{"b_constraint": null, "name": "De...
      0.5028
      10000
      50000
      [[442, 161], [78, 492]]
      [[300, 336], [48, 575]]
      [[250, 177], [198, 247]]
      [[275, 203], [154, 370]]
      ...
      [[202, 185], [61, 331]]
      [[253, 115], [112, 237]]
      [[208, 208], [95, 375]]
      [[161, 127], [103, 181]]
      [[181, 174], [97, 262]]
      [[1366, 1309], [518, 2244]]
      [[1523, 774], [766, 1500]]
      0.6633
      512
      3
    
    
      1
      [[3104, 1441], [1868, 3587]]
      5
      {"layers": [{"b_constraint": null, "name": "De...
      0.5028
      10000
      50000
      [[345, 258], [34, 536]]
      [[483, 153], [137, 486]]
      [[195, 232], [134, 311]]
      [[320, 158], [215, 309]]
      ...
      [[294, 93], [199, 193]]
      [[200, 168], [67, 282]]
      [[300, 116], [177, 293]]
      [[142, 146], [81, 203]]
      [[208, 147], [114, 245]]
      [[1914, 761], [1005, 1757]]
      [[1190, 1107], [436, 1830]]
      0.6691
      512
      3
    
    
      1
      [[2547, 1102], [2425, 3926]]
      5
      {"layers": [{"b_constraint": null, "name": "De...
      0.5028
      10000
      50000
      [[458, 145], [87, 483]]
      [[203, 433], [31, 592]]
      [[255, 172], [218, 227]]
      [[203, 275], [89, 435]]
      ...
      [[122, 265], [31, 361]]
      [[269, 99], [116, 233]]
      [[146, 270], [38, 432]]
      [[174, 114], [106, 178]]
      [[155, 200], [67, 292]]
      [[958, 1717], [283, 2479]]
      [[1589, 708], [819, 1447]]
      0.6473
      512
      3
    
  

5 rows × 23 columns



In [41]:

    
x = df['training_size']
y = df['overall_acc']
plt.scatter(x,y)
plt.xlabel('training count')
plt.ylabel('accuracy')
plt.ylim((0.0,1.0))









    Out[41]:





(0.0, 1.0)



In [42]:

    
acc_mean = df.groupby(['training_size']).apply(np.mean)['overall_acc']
acc_std = df.groupby(['training_size']).apply(np.std)['overall_acc']
print(acc_std)
print(acc_mean)









    



training_size
50000     0.009920
100000    0.010654
150000    0.009423
200000    0.010007
250000    0.037423
300000    0.003605
350000    0.011068
400000    0.016781
450000    0.032245
500000    0.012902
Name: overall_acc, dtype: float64
training_size
50000     0.66544
100000    0.69374
150000    0.72022
200000    0.73286
250000    0.71084
300000    0.79136
350000    0.78848
400000    0.80994
450000    0.80134
500000    0.82818
Name: overall_acc, dtype: float64



In [50]:

    
# plot it!
fig, ax = plt.subplots(1, figsize=(12,8))
ax.plot(list(acc_mean.index), acc_mean, lw=2, color='blue')
ax.fill_between(list(acc_mean.index), acc_mean+acc_std, acc_mean-acc_std, facecolor='blue', alpha=0.2)
ax.set_xlabel('Training Size', fontsize=20)
ax.set_ylabel("Accuracy", fontsize=20)
ax.set_title("Training Size vs. Accuracy", fontsize=30)
plt.setp(ax.get_xticklabels(), fontsize=13)
plt.setp(ax.get_yticklabels(), fontsize=15)









    Out[50]:





[None, None, None, None, None]



In [ ]:

	cm_overall	epochs	network	pct_white	test_size	training_size	p_cm	P_cm	r_cm	R_cm	n_cm	N_cm	b_cm	B_cm	q_cm	Q_cm	k_cm	K_cm	white_cm	black_cm
1	[[3807, 559], [1165, 4469]]	5	{"layers": [{"b_constraint": null, "name": "De...	0.5028	10000	500000	[[504, 99], [70, 500]]	[[462, 174], [32, 591]]	[[314, 113], [77, 368]]	[[342, 136], [58, 466]]	[[268, 37], [49, 267]]	[[293, 110], [13, 381]]	[[259, 47], [85, 217]]	[[276, 111], [25, 367]]	[[301, 67], [62, 287]]	[[295, 121], [27, 443]]	[[229, 59], [39, 245]]	[[264, 91], [22, 337]]	[[1932, 743], [177, 2585]]	[[1875, 422], [382, 1884]]
1	[[3601, 369], [1371, 4659]]	5	{"layers": [{"b_constraint": null, "name": "De...	0.5028	10000	500000	[[464, 139], [20, 550]]	[[480, 156], [37, 586]]	[[259, 168], [38, 407]]	[[336, 142], [58, 466]]	[[231, 74], [36, 280]]	[[295, 108], [12, 382]]	[[222, 84], [34, 268]]	[[291, 96], [37, 355]]	[[273, 95], [22, 327]]	[[302, 114], [37, 433]]	[[202, 86], [16, 268]]	[[246, 109], [22, 337]]	[[1950, 725], [203, 2559]]	[[1651, 646], [166, 2100]]
1	[[3854, 567], [1118, 4461]]	5	{"layers": [{"b_constraint": null, "name": "De...	0.5028	10000	500000	[[519, 84], [57, 513]]	[[486, 150], [35, 588]]	[[304, 123], [61, 384]]	[[359, 119], [76, 448]]	[[275, 30], [52, 264]]	[[297, 106], [19, 375]]	[[256, 50], [62, 240]]	[[290, 97], [46, 346]]	[[291, 77], [45, 304]]	[[301, 115], [43, 427]]	[[223, 65], [32, 252]]	[[253, 102], [39, 320]]	[[1986, 689], [258, 2504]]	[[1868, 429], [309, 1957]]
1	[[3945, 492], [1027, 4536]]	5	{"layers": [{"b_constraint": null, "name": "De...	0.5028	10000	500000	[[509, 94], [38, 532]]	[[512, 124], [53, 570]]	[[307, 120], [54, 391]]	[[373, 105], [70, 454]]	[[256, 49], [37, 279]]	[[329, 74], [14, 380]]	[[246, 60], [44, 258]]	[[308, 79], [42, 350]]	[[291, 77], [39, 310]]	[[329, 87], [42, 428]]	[[219, 69], [30, 254]]	[[266, 89], [29, 330]]	[[2117, 558], [250, 2512]]	[[1828, 469], [242, 2024]]
1	[[3811, 762], [1161, 4266]]	5	{"layers": [{"b_constraint": null, "name": "De...	0.5028	10000	500000	[[532, 71], [90, 480]]	[[479, 157], [33, 590]]	[[311, 116], [106, 339]]	[[340, 138], [47, 477]]	[[269, 36], [113, 203]]	[[282, 121], [21, 373]]	[[259, 47], [111, 191]]	[[279, 108], [37, 355]]	[[304, 64], [97, 252]]	[[290, 126], [33, 437]]	[[219, 69], [49, 235]]	[[247, 108], [25, 334]]	[[1917, 758], [196, 2566]]	[[1894, 403], [566, 1700]]

	cm_overall	epochs	network	pct_white	test_size	training_size	p_cm	P_cm	r_cm	R_cm	...	B_cm	q_cm	Q_cm	k_cm	K_cm	white_cm	black_cm	overall_acc	width	num_layers
1	[[3149, 1421], [1823, 3607]]	5	{"layers": [{"b_constraint": null, "name": "De...	0.5028	10000	50000	[[356, 247], [39, 531]]	[[471, 165], [106, 517]]	[[186, 241], [118, 327]]	[[355, 123], [258, 266]]	...	[[287, 100], [168, 224]]	[[199, 169], [55, 294]]	[[302, 114], [172, 298]]	[[138, 150], [86, 198]]	[[231, 124], [145, 214]]	[[1937, 738], [982, 1780]]	[[1212, 1085], [439, 1827]]	0.6756	512	3
1	[[2792, 1101], [2180, 3927]]	5	{"layers": [{"b_constraint": null, "name": "De...	0.5028	10000	50000	[[376, 227], [56, 514]]	[[361, 275], [65, 558]]	[[185, 242], [116, 329]]	[[293, 185], [176, 348]]	...	[[236, 151], [107, 285]]	[[185, 183], [62, 287]]	[[246, 170], [111, 359]]	[[140, 148], [72, 212]]	[[197, 158], [94, 265]]	[[1563, 1112], [640, 2122]]	[[1229, 1068], [461, 1805]]	0.6719	512	3
1	[[2889, 1284], [2083, 3744]]	5	{"layers": [{"b_constraint": null, "name": "De...	0.5028	10000	50000	[[442, 161], [78, 492]]	[[300, 336], [48, 575]]	[[250, 177], [198, 247]]	[[275, 203], [154, 370]]	...	[[202, 185], [61, 331]]	[[253, 115], [112, 237]]	[[208, 208], [95, 375]]	[[161, 127], [103, 181]]	[[181, 174], [97, 262]]	[[1366, 1309], [518, 2244]]	[[1523, 774], [766, 1500]]	0.6633	512	3
1	[[3104, 1441], [1868, 3587]]	5	{"layers": [{"b_constraint": null, "name": "De...	0.5028	10000	50000	[[345, 258], [34, 536]]	[[483, 153], [137, 486]]	[[195, 232], [134, 311]]	[[320, 158], [215, 309]]	...	[[294, 93], [199, 193]]	[[200, 168], [67, 282]]	[[300, 116], [177, 293]]	[[142, 146], [81, 203]]	[[208, 147], [114, 245]]	[[1914, 761], [1005, 1757]]	[[1190, 1107], [436, 1830]]	0.6691	512	3
1	[[2547, 1102], [2425, 3926]]	5	{"layers": [{"b_constraint": null, "name": "De...	0.5028	10000	50000	[[458, 145], [87, 483]]	[[203, 433], [31, 592]]	[[255, 172], [218, 227]]	[[203, 275], [89, 435]]	...	[[122, 265], [31, 361]]	[[269, 99], [116, 233]]	[[146, 270], [38, 432]]	[[174, 114], [106, 178]]	[[155, 200], [67, 292]]	[[958, 1717], [283, 2479]]	[[1589, 708], [819, 1447]]	0.6473	512	3