In [2]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
In [3]:
sns.set_style("whitegrid")
In [4]:
from MyML.helper.plotting import save_fig
In [5]:
folder = "/home/chiroptera/QCThesis/experiments/study kmin/"
filename = "results_kmin_100k.csv"
In [6]:
res = pd.read_csv(folder + filename)
for col in res.columns:
print col
In [7]:
res = res[np.logical_not(res.n_samples.isnull())]
res.max_assoc = res.max_assoc.apply(lambda s: int(s.strip('()').split(',')[0]))
res['sparsity'] = res.n_assocs * 1.0 / (res.n_samples ** 2)
In [8]:
by_rule_n = res.groupby(by=["rule","n_samples"])
rule_n_mean = by_rule_n.apply(np.mean)
rule_n_mean = rule_n_mean[['kmin','kmax','t_ensemble','biggest_cluster','t_build','max_assoc','sparsity']]
rule_n_mean = rule_n_mean.reset_index()
In [9]:
rule_n_mean
Out[9]:
In [46]:
fig1 = plt.figure(figsize=(16,6))
sns.set_palette(sns.color_palette("deep", 6))
ax = fig1.add_subplot(121)
for key,grp in rule_n_mean.groupby(by="rule"):
idx = grp.n_samples >= 1e3
ax.plot(grp.n_samples[idx], grp.sparsity[idx], label=key)
ax.set_xlabel("# samples")
ax.set_ylabel("sparsity")
ax.set_title("Sparsity evolution with several rules")
ax.legend(loc="best")
ax.grid(True, which="both")
ax.set_xscale("log")
ax = fig1.add_subplot(122)
for key,grp in rule_n_mean.groupby(by="rule"):
idx = grp.n_samples >= 1e4
ax.plot(grp.n_samples[idx], grp.sparsity[idx], label=key)
ax.set_xlabel("# samples")
ax.set_ylabel("sparsity")
ax.set_title("Sparsity evolution with several rules")
ax.legend(loc="best")
ax.grid(True, which="both")
ax.set_xscale("log")
In [47]:
fig2 = plt.figure(figsize=(16,6))
sns.set_palette(sns.color_palette("deep", 6))
ax = fig2.add_subplot(121)
for key,grp in rule_n_mean.groupby(by="rule"):
idx = grp.n_samples >= 1e3
ax.plot(grp.n_samples[idx], grp.max_assoc[idx], label=key)
ax.set_xlabel("# samples")
ax.set_ylabel("max. # assocs.")
ax.set_title("Max. # assocs. evolution with several rules")
ax.legend(loc="best")
ax.grid(True, which="both")
ax.set_xscale("log")
In [48]:
fig3 = plt.figure(figsize=(16,6))
sns.set_palette(sns.color_palette("deep", 6))
ax = fig3.add_subplot(121)
for key,grp in rule_n_mean.groupby(by="rule"):
idx = grp.n_samples >= 1e3
ax.plot(grp.n_samples[idx], grp.t_ensemble[idx], label=key)
ax.set_xlabel("# samples")
ax.set_ylabel("time to generate ensemble [s]")
ax.set_title("Time to generate ensembles")
ax.legend(loc="best")
ax.grid(True, which="both")
ax.set_xscale("log")
#ax.set_yscale("log")
ax = fig3.add_subplot(122)
for key,grp in rule_n_mean.groupby(by="rule"):
idx = grp.n_samples >= 1e3
ax.plot(grp.n_samples[idx], grp.t_build[idx], label=key)
ax.set_xlabel("# samples")
ax.set_ylabel("time to build matrix [s]")
ax.set_title("Time to build matrices")
ax.legend(loc="best")
ax.grid(True, which="both")
ax.set_xscale("log")
In [41]:
# double y axis example
fig4 = plt.figure(figsize=(16,6))
sns.set_palette(sns.color_palette("dark", 10))
ax = fig4.add_subplot(121)
sns.set_palette(sns.color_palette("deep", 10))
ax_s = ax.twinx()
for key,grp in rule_n_mean.groupby(by="rule"):
idx = grp.n_samples >= 1e3
ax.plot(grp.n_samples[idx], grp.sparsity[idx], label=key)
ax_s.plot(grp.n_samples[idx], grp.n_samples[idx] / grp.kmin[idx], label=key)
ax_s.set_ylabel("# samples / Kmin")
#ax_s.legend(loc=(1.1, 0.8), title="max_assocs")
ax.set_xlabel("# samples")
ax.set_ylabel("sparsity")
ax.set_title("Sparsity and samples per cluster evolution")
#ax.legend(loc=(1.1, 0.0), title="sparsity")
ax.grid(True, which="both")
ax.set_xscale("log")
sns.set_palette(sns.color_palette("dark", 10))
ax = fig4.add_subplot(122)
sns.set_palette(sns.color_palette("deep", 10))
ax_s = ax.twinx()
for key,grp in rule_n_mean.groupby(by="rule"):
idx = grp.n_samples >= 1e4
ax.plot(grp.n_samples[idx], grp.sparsity[idx], label=key)
ax_s.plot(grp.n_samples[idx], grp.n_samples[idx] / grp.kmin[idx], label=key)
ax_s.set_ylabel("# samples / Kmin")
ax_s.legend(loc=(1.1, 0.8), title="max_assocs")
ax.set_xlabel("# samples")
ax.set_ylabel("sparsity")
ax.set_title("Sparsity and samples per cluster evolution")
ax.legend(loc=(1.1, 0.0), title="sparsity")
ax.grid(True, which="both")
ax.set_xscale("log")
In [42]:
dark_deep_palette = list()
for (col1,col2) in zip(sns.color_palette("dark", 6),sns.color_palette("deep", 6)):
dark_deep_palette.append(col1)
dark_deep_palette.append(col2)
sns.palplot(dark_deep_palette)
In [43]:
# double y axis example
fig5 = plt.figure(figsize=(18,6))
sns.set_palette(dark_deep_palette, n_colors=len(dark_deep_palette))
ax = fig5.add_subplot(121)
for key,grp in rule_n_mean.groupby(by="rule"):
idx = grp.n_samples >= 1e3
ax.plot(grp.n_samples[idx], grp.max_assoc[idx], label=key + " max assoc")
ax.plot(grp.n_samples[idx], grp.biggest_cluster[idx], label=key + " clust")
ax_s.set_ylabel("biggest cluster")
ax_s.legend(loc=(1.1, 0.8), title="biggest cluster")
ax.set_xlabel("# samples")
ax.set_ylabel("max # assocs / biggest cluster size")
ax.set_title("Max. num. assocs and biggest cluster size evolution per rule")
ax.legend(loc="upper left")#(1.1, 0.0), title="Legend")
ax.grid(True, which="both")
ax.set_xscale("log")
In [44]:
# double y axis example
fig6 = plt.figure(figsize=(18,6))
sns.set_palette(sns.color_palette("deep", 6))
ax = fig6.add_subplot(121)
for key,grp in rule_n_mean.groupby(by="rule"):
idx = grp.n_samples >= 1e3
ax.plot(grp.n_samples[idx], grp.max_assoc[idx] / grp.biggest_cluster[idx], label=key)
ax.set_xlabel("# samples")
ax.set_ylabel("max # assocs / biggest cluster size")
ax.set_title("Relationship between max. num. assocs and biggest cluster size per rule")
ax.legend(loc="lower right")#loc=(1.1, 0.0))
ax.grid(True, which="both")
ax.set_xscale("log")
In [49]:
save_fig(fig1,folder + "sparsity")
save_fig(fig2,folder + "max_assocs")
save_fig(fig3,folder + "times")
save_fig(fig4,folder + "sparsity_kmin")
save_fig(fig5,folder + "max_assoc_cluster_size")
save_fig(fig6,folder + "max_assoc_div_cluster_size")