notebook.community

Edit and run



In [2]:

    
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline



In [3]:

    
sns.set_style("whitegrid")



In [4]:

    
from MyML.helper.plotting import save_fig



In [5]:

    
folder = "/home/chiroptera/QCThesis/experiments/study kmin/"
filename = "results_kmin_100k.csv"



In [6]:

    
res = pd.read_csv(folder + filename)
for col in res.columns:
    print col









    



Unnamed: 0
n_samples
rule
kmin
kmax
t_ensemble
biggest_cluster
type_mat
t_build
n_assocs
max_assoc
t_sl
accuracy
round



In [7]:

    
res = res[np.logical_not(res.n_samples.isnull())]
res.max_assoc = res.max_assoc.apply(lambda s: int(s.strip('()').split(',')[0]))
res['sparsity'] = res.n_assocs * 1.0 / (res.n_samples ** 2)



In [8]:

    
by_rule_n = res.groupby(by=["rule","n_samples"])
rule_n_mean = by_rule_n.apply(np.mean)
rule_n_mean = rule_n_mean[['kmin','kmax','t_ensemble','biggest_cluster','t_build','max_assoc','sparsity']]
rule_n_mean = rule_n_mean.reset_index()



In [9]:

    
rule_n_mean









    Out[9]:






  
    
      
      rule
      n_samples
      kmin
      kmax
      t_ensemble
      biggest_cluster
      t_build
      max_assoc
      sparsity
    
  
  
    
      0
      2sqrt
      500
      24
      46
      0.449143
      103.0
      0.003593
      102.0
      0.175640
    
    
      1
      2sqrt
      1000
      32
      64
      0.614484
      163.0
      0.007613
      179.0
      0.140068
    
    
      2
      2sqrt
      5000
      72
      142
      1.653651
      244.2
      0.193588
      553.0
      0.064680
    
    
      3
      2sqrt
      10000
      100
      200
      3.284222
      357.6
      0.679371
      825.0
      0.048116
    
    
      4
      2sqrt
      25000
      160
      318
      9.912761
      514.6
      2.944074
      1290.4
      0.029989
    
    
      5
      2sqrt
      50000
      224
      448
      25.169035
      631.6
      8.518460
      1645.0
      0.020584
    
    
      6
      2sqrt
      100000
      318
      634
      65.957036
      885.8
      25.149985
      2302.6
      0.014636
    
    
      7
      sk=300,th=30%
      500
      2
      3
      0.256407
      333.0
      0.015492
      499.0
      0.906096
    
    
      8
      sk=300,th=30%
      1000
      4
      5
      0.333140
      654.0
      0.036897
      722.2
      0.569142
    
    
      9
      sk=300,th=30%
      5000
      17
      22
      0.601839
      1353.6
      0.685861
      1352.6
      0.211443
    
    
      10
      sk=300,th=30%
      10000
      34
      44
      1.275257
      1695.6
      1.757256
      1708.0
      0.156986
    
    
      11
      sk=300,th=30%
      25000
      84
      109
      4.562427
      1136.4
      6.138902
      2757.2
      0.065943
    
    
      12
      sk=300,th=30%
      50000
      167
      217
      15.033801
      1053.8
      14.082020
      2673.4
      0.033254
    
    
      13
      sk=300,th=30%
      100000
      334
      434
      53.708112
      946.0
      30.026679
      2467.4
      0.016464
    
    
      14
      sk=sqrt/2,th=30%
      500
      46
      60
      0.554201
      73.2
      0.004065
      85.4
      0.116782
    
    
      15
      sk=sqrt/2,th=30%
      1000
      67
      87
      0.795285
      60.4
      0.007561
      133.4
      0.080113
    
    
      16
      sk=sqrt/2,th=30%
      5000
      143
      186
      2.325367
      123.2
      0.126228
      327.4
      0.038199
    
    
      17
      sk=sqrt/2,th=30%
      10000
      200
      260
      4.727282
      185.0
      0.458040
      432.0
      0.027395
    
    
      18
      sk=sqrt/2,th=30%
      25000
      317
      412
      14.629016
      246.0
      1.921412
      657.2
      0.017175
    
    
      19
      sk=sqrt/2,th=30%
      50000
      451
      586
      37.425763
      358.8
      5.502441
      917.2
      0.012107
    
    
      20
      sk=sqrt/2,th=30%
      100000
      633
      823
      101.051155
      486.4
      16.198583
      1288.2
      0.008585
    
    
      21
      sqrt
      500
      12
      23
      0.365377
      153.8
      0.020901
      167.2
      0.268864
    
    
      22
      sqrt
      1000
      16
      32
      0.461489
      244.2
      0.010415
      243.2
      0.199118
    
    
      23
      sqrt
      5000
      36
      71
      1.009844
      762.0
      0.344361
      869.2
      0.126275
    
    
      24
      sqrt
      10000
      50
      100
      1.929875
      836.4
      1.146886
      1643.8
      0.094650
    
    
      25
      sqrt
      25000
      80
      159
      5.414094
      1369.2
      5.308476
      2668.4
      0.059584
    
    
      26
      sqrt
      50000
      112
      224
      13.590840
      1474.0
      15.921081
      3479.8
      0.041310
    
    
      27
      sqrt
      100000
      159
      317
      34.770353
      2030.4
      48.424562
      4843.4
      0.029708



In [46]:

    
fig1 = plt.figure(figsize=(16,6))
sns.set_palette(sns.color_palette("deep", 6))

ax = fig1.add_subplot(121)
for key,grp in rule_n_mean.groupby(by="rule"):
    idx = grp.n_samples >= 1e3
    ax.plot(grp.n_samples[idx], grp.sparsity[idx], label=key)

ax.set_xlabel("# samples")
ax.set_ylabel("sparsity")
ax.set_title("Sparsity evolution with several rules")
ax.legend(loc="best")
ax.grid(True, which="both")
ax.set_xscale("log")

    
ax = fig1.add_subplot(122)
for key,grp in rule_n_mean.groupby(by="rule"):
    idx = grp.n_samples >= 1e4
    ax.plot(grp.n_samples[idx], grp.sparsity[idx], label=key)    
    
ax.set_xlabel("# samples")
ax.set_ylabel("sparsity")
ax.set_title("Sparsity evolution with several rules")
ax.legend(loc="best")
ax.grid(True, which="both")
ax.set_xscale("log")



In [47]:

    
fig2 = plt.figure(figsize=(16,6))
sns.set_palette(sns.color_palette("deep", 6))

ax = fig2.add_subplot(121)
for key,grp in rule_n_mean.groupby(by="rule"):
    idx = grp.n_samples >= 1e3
    ax.plot(grp.n_samples[idx], grp.max_assoc[idx], label=key)

ax.set_xlabel("# samples")
ax.set_ylabel("max. # assocs.")
ax.set_title("Max. # assocs. evolution with several rules")
ax.legend(loc="best")
ax.grid(True, which="both")
ax.set_xscale("log")



In [48]:

    
fig3 = plt.figure(figsize=(16,6))
sns.set_palette(sns.color_palette("deep", 6))

ax = fig3.add_subplot(121)
for key,grp in rule_n_mean.groupby(by="rule"):
    idx = grp.n_samples >= 1e3
    ax.plot(grp.n_samples[idx], grp.t_ensemble[idx], label=key)

ax.set_xlabel("# samples")
ax.set_ylabel("time to generate ensemble [s]")
ax.set_title("Time to generate ensembles")
ax.legend(loc="best")
ax.grid(True, which="both")
ax.set_xscale("log")
#ax.set_yscale("log")
    
ax = fig3.add_subplot(122)
for key,grp in rule_n_mean.groupby(by="rule"):
    idx = grp.n_samples >= 1e3
    ax.plot(grp.n_samples[idx], grp.t_build[idx], label=key)

ax.set_xlabel("# samples")
ax.set_ylabel("time to build matrix [s]")
ax.set_title("Time to build matrices")
ax.legend(loc="best")
ax.grid(True, which="both")
ax.set_xscale("log")



In [41]:

    
# double y axis example
fig4 = plt.figure(figsize=(16,6))

sns.set_palette(sns.color_palette("dark", 10))
ax = fig4.add_subplot(121)
sns.set_palette(sns.color_palette("deep", 10))
ax_s = ax.twinx()
for key,grp in rule_n_mean.groupby(by="rule"):
    idx = grp.n_samples >= 1e3
    ax.plot(grp.n_samples[idx], grp.sparsity[idx], label=key)
    ax_s.plot(grp.n_samples[idx], grp.n_samples[idx] / grp.kmin[idx], label=key)

ax_s.set_ylabel("# samples / Kmin")
#ax_s.legend(loc=(1.1, 0.8), title="max_assocs")


ax.set_xlabel("# samples")
ax.set_ylabel("sparsity")
ax.set_title("Sparsity and samples per cluster evolution")
#ax.legend(loc=(1.1, 0.0), title="sparsity")
ax.grid(True, which="both")
ax.set_xscale("log")

sns.set_palette(sns.color_palette("dark", 10))
ax = fig4.add_subplot(122)
sns.set_palette(sns.color_palette("deep", 10))
ax_s = ax.twinx()
for key,grp in rule_n_mean.groupby(by="rule"):
    idx = grp.n_samples >= 1e4
    ax.plot(grp.n_samples[idx], grp.sparsity[idx], label=key)    
    ax_s.plot(grp.n_samples[idx], grp.n_samples[idx] / grp.kmin[idx], label=key)

ax_s.set_ylabel("# samples / Kmin")    
ax_s.legend(loc=(1.1, 0.8), title="max_assocs")

ax.set_xlabel("# samples")
ax.set_ylabel("sparsity")
ax.set_title("Sparsity and samples per cluster evolution")
ax.legend(loc=(1.1, 0.0), title="sparsity")
ax.grid(True, which="both")
ax.set_xscale("log")



In [42]:

    
dark_deep_palette = list()
for (col1,col2) in zip(sns.color_palette("dark", 6),sns.color_palette("deep", 6)):
    dark_deep_palette.append(col1)
    dark_deep_palette.append(col2)
    
sns.palplot(dark_deep_palette)



In [43]:

    
# double y axis example
fig5 = plt.figure(figsize=(18,6))

sns.set_palette(dark_deep_palette, n_colors=len(dark_deep_palette))
ax = fig5.add_subplot(121)
for key,grp in rule_n_mean.groupby(by="rule"):
    idx = grp.n_samples >= 1e3
    ax.plot(grp.n_samples[idx], grp.max_assoc[idx], label=key + " max assoc")
    ax.plot(grp.n_samples[idx], grp.biggest_cluster[idx], label=key + " clust")

ax_s.set_ylabel("biggest cluster")    
ax_s.legend(loc=(1.1, 0.8), title="biggest cluster")

ax.set_xlabel("# samples")
ax.set_ylabel("max # assocs / biggest cluster size")
ax.set_title("Max. num. assocs and biggest cluster size evolution per rule")
ax.legend(loc="upper left")#(1.1, 0.0), title="Legend")
ax.grid(True, which="both")
ax.set_xscale("log")



In [44]:

    
# double y axis example
fig6 = plt.figure(figsize=(18,6))

sns.set_palette(sns.color_palette("deep", 6))
ax = fig6.add_subplot(121)
for key,grp in rule_n_mean.groupby(by="rule"):
    idx = grp.n_samples >= 1e3
    ax.plot(grp.n_samples[idx], grp.max_assoc[idx] / grp.biggest_cluster[idx], label=key)

ax.set_xlabel("# samples")
ax.set_ylabel("max # assocs / biggest cluster size")
ax.set_title("Relationship between max. num. assocs and biggest cluster size per rule")
ax.legend(loc="lower right")#loc=(1.1, 0.0))
ax.grid(True, which="both")
ax.set_xscale("log")



In [49]:

    
save_fig(fig1,folder + "sparsity")
save_fig(fig2,folder + "max_assocs")
save_fig(fig3,folder + "times")
save_fig(fig4,folder + "sparsity_kmin")
save_fig(fig5,folder + "max_assoc_cluster_size")
save_fig(fig6,folder + "max_assoc_div_cluster_size")

	rule	n_samples	kmin	kmax	t_ensemble	biggest_cluster	t_build	max_assoc	sparsity
0	2sqrt	500	24	46	0.449143	103.0	0.003593	102.0	0.175640
1	2sqrt	1000	32	64	0.614484	163.0	0.007613	179.0	0.140068
2	2sqrt	5000	72	142	1.653651	244.2	0.193588	553.0	0.064680
3	2sqrt	10000	100	200	3.284222	357.6	0.679371	825.0	0.048116
4	2sqrt	25000	160	318	9.912761	514.6	2.944074	1290.4	0.029989
5	2sqrt	50000	224	448	25.169035	631.6	8.518460	1645.0	0.020584
6	2sqrt	100000	318	634	65.957036	885.8	25.149985	2302.6	0.014636
7	sk=300,th=30%	500	2	3	0.256407	333.0	0.015492	499.0	0.906096
8	sk=300,th=30%	1000	4	5	0.333140	654.0	0.036897	722.2	0.569142
9	sk=300,th=30%	5000	17	22	0.601839	1353.6	0.685861	1352.6	0.211443
10	sk=300,th=30%	10000	34	44	1.275257	1695.6	1.757256	1708.0	0.156986
11	sk=300,th=30%	25000	84	109	4.562427	1136.4	6.138902	2757.2	0.065943
12	sk=300,th=30%	50000	167	217	15.033801	1053.8	14.082020	2673.4	0.033254
13	sk=300,th=30%	100000	334	434	53.708112	946.0	30.026679	2467.4	0.016464
14	sk=sqrt/2,th=30%	500	46	60	0.554201	73.2	0.004065	85.4	0.116782
15	sk=sqrt/2,th=30%	1000	67	87	0.795285	60.4	0.007561	133.4	0.080113
16	sk=sqrt/2,th=30%	5000	143	186	2.325367	123.2	0.126228	327.4	0.038199
17	sk=sqrt/2,th=30%	10000	200	260	4.727282	185.0	0.458040	432.0	0.027395
18	sk=sqrt/2,th=30%	25000	317	412	14.629016	246.0	1.921412	657.2	0.017175
19	sk=sqrt/2,th=30%	50000	451	586	37.425763	358.8	5.502441	917.2	0.012107
20	sk=sqrt/2,th=30%	100000	633	823	101.051155	486.4	16.198583	1288.2	0.008585
21	sqrt	500	12	23	0.365377	153.8	0.020901	167.2	0.268864
22	sqrt	1000	16	32	0.461489	244.2	0.010415	243.2	0.199118
23	sqrt	5000	36	71	1.009844	762.0	0.344361	869.2	0.126275
24	sqrt	10000	50	100	1.929875	836.4	1.146886	1643.8	0.094650
25	sqrt	25000	80	159	5.414094	1369.2	5.308476	2668.4	0.059584
26	sqrt	50000	112	224	13.590840	1474.0	15.921081	3479.8	0.041310
27	sqrt	100000	159	317	34.770353	2030.4	48.424562	4843.4	0.029708