In [39]:
import os
import itertools
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style="darkgrid")
df = pd.read_csv(os.getcwd() + 'output/output.csv')
#df['gflops'] = df['arithmetic intensity'] + df['time']
df['gflops'] = (df['n'] * df['d'] * df['k']/1E9 )/ df['time']
df['time_speedup'] = df['arithmetic intensity'] + df['time']
algorithms = df['algorithm'].unique().tolist()
Ns = df['n'].unique().tolist()
Ds = df['d'].unique().tolist()
Ks = df['k'].unique().tolist()
In [40]:
# bar plots for every algorithm at every combination of N, D, and K
# TIME
for N, D, K in [x for x in list(itertools.product(Ns, Ds, Ks))]:
sns.plt.clf()
fig = sns.plt.gcf()
sns.barplot(x="algorithm", y="time", data=df[(df['n']==N) & (df['d']==D) & (df['k']==K)])
plt.title('Time Performance of K-means Algorithms: n=%s, d=%s, k=%s' % (str(N), str(D), str(K)))
plt.xlabel('Algorithm Type')
plt.ylabel('Time (s)')
#plt.show()
fig.savefig(os.getcwd() + '/plots/time-bar-%s-%s-%s.png' % (str(N), str(D), str(K)))
plt.close()
# GFLOP/S
for N, D, K in [x for x in list(itertools.product(Ns, Ds, Ks))]:
sns.plt.clf()
fig = sns.plt.gcf()
sns.barplot(x="algorithm", y="gflops", data=df[(df['n']==N) & (df['d']==D) & (df['k']==K)])
plt.title('GFlop/s Performance of K-means Algorithms: n=%s, d=%s, k=%s' % (str(N), str(D), str(K)))
plt.xlabel('Algorithm Type')
plt.ylabel('Time (s)')
#plt.show()
fig.savefig(os.getcwd() + '/plots/gflops-bar-%s-%s-%s.png' % (str(N), str(D), str(K)))
plt.close()
In [41]:
### TIME line plots
# line plots for every algorithm for N vs. time at every combination of D and K
for D, K in [x for x in list(itertools.product(Ds, Ks))]:
df_n = df[(df['d']==D) & (df['k']==K)]
sns_plot = sns.FacetGrid(df_n, hue="algorithm", size=6, legend_out=True)
sns_plot = sns_plot.map(plt.plot, "n", "time", marker='o')
sns_plot = sns_plot.set(xticklabels=[str(n) for n in Ns])
sns_plot = sns_plot.add_legend()
plt.title('Time Performance of K-means Algorithms by N: d=%s, k=%s' % (str(D), str(K)))
plt.xlabel('Dataset N')
plt.ylabel('Time (s)')
#plt.show()
sns_plot.savefig(os.getcwd() + '/plots/time-line-n-%s-%s.png' % (str(D), str(K)))
plt.close()
# line plots for every algorithm for D vs. time at every combination of N and K
for N, K in [x for x in list(itertools.product(Ns, Ks))]:
df_n = df[(df['n']==N) & (df['k']==K)]
sns_plot = sns.FacetGrid(df_n, hue="algorithm", size=6, legend_out=True)
sns_plot = sns_plot.map(plt.plot, "d", "time", marker='o')
sns_plot = sns_plot.set(xticklabels=[str(d) for d in Ds])
sns_plot = sns_plot.add_legend()
plt.title('Time Performance of K-means Algorithms by D: n=%s, k=%s' % (str(N), str(K)))
plt.xlabel('Dataset N')
plt.ylabel('Time (s)')
#plt.show()
sns_plot.savefig(os.getcwd() + '/plots/time-line-d-%s-%s.png' % (str(N), str(K)))
plt.close()
# line plots for every algorithm for K vs. time at every combination of N and D
for N, D in [x for x in list(itertools.product(Ns, Ds))]:
df_n = df[(df['n']==N) & (df['d']==D)]
sns_plot = sns.FacetGrid(df_n, hue="algorithm", size=6, legend_out=True)
sns_plot = sns_plot.map(plt.plot, "k", "time", marker='o')
sns_plot = sns_plot.set(xticklabels=[str(k) for k in Ks])
sns_plot = sns_plot.add_legend()
plt.title('Time Performance of K-means Algorithms by K: n=%s, d=%s' % (str(N), str(D)))
plt.xlabel('Dataset N')
plt.ylabel('Time (s)')
#plt.show()
sns_plot.savefig(os.getcwd() + '/plots/time-line-k-%s-%s.png' % (str(N), str(D)))
plt.close()
In [42]:
### GFlop/s line plots
# line plots for every algorithm for N vs. GFlop/s at every combination of D and K
for D, K in [x for x in list(itertools.product(Ds, Ks))]:
df_n = df[(df['d']==D) & (df['k']==K)]
sns_plot = sns.FacetGrid(df_n, hue="algorithm", size=6, legend_out=True)
sns_plot = sns_plot.map(plt.plot, "n", "gflops", marker='o')
sns_plot = sns_plot.set(xticklabels=[str(n) for n in Ns])
sns_plot = sns_plot.add_legend()
plt.title('GFlop/s Performance of K-means Algorithms by N: d=%s, k=%s' % (str(D), str(K)))
plt.xlabel('Dataset N')
plt.ylabel('GFlop/s')
#plt.show()
sns_plot.savefig(os.getcwd() + '/plots/gflops-line-n-%s-%s.png' % (str(D), str(K)))
plt.close()
# line plots for every algorithm for D vs. GFlop/s at every combination of N and K
for N, K in [x for x in list(itertools.product(Ns, Ks))]:
df_n = df[(df['n']==N) & (df['k']==K)]
sns_plot = sns.FacetGrid(df_n, hue="algorithm", size=6, legend_out=True)
sns_plot = sns_plot.map(plt.plot, "d", "gflops", marker='o')
sns_plot = sns_plot.set(xticklabels=[str(d) for d in Ds])
sns_plot = sns_plot.add_legend()
plt.title('GFlop/s Performance of K-means Algorithms by D: n=%s, k=%s' % (str(N), str(K)))
plt.xlabel('Dataset N')
plt.ylabel('GFlop/s')
#plt.show()
sns_plot.savefig(os.getcwd() + '/plots/gflops-line-d-%s-%s.png' % (str(N), str(K)))
plt.close()
# line plots for every algorithm for K vs. GFlop/s at every combination of N and D
for N, D in [x for x in list(itertools.product(Ns, Ds))]:
df_n = df[(df['n']==N) & (df['d']==D)]
sns_plot = sns.FacetGrid(df_n, hue="algorithm", size=6, legend_out=True)
sns_plot = sns_plot.map(plt.plot, "k", "gflops", marker='o')
sns_plot = sns_plot.set(xticklabels=[str(k) for k in Ks])
sns_plot = sns_plot.add_legend()
plt.title('GFlop/s Performance of K-means Algorithms by K: n=%s, d=%s' % (str(N), str(D)))
plt.xlabel('Dataset N')
plt.ylabel('GFlop/s')
#plt.show()
sns_plot.savefig(os.getcwd() + '/plots/gflops-line-k-%s-%s.png' % (str(N), str(D)))
plt.close()
In [51]:
### TIME line plots as % of sequential time ("speed-up"?)
# line plots for every algorithm for N vs. time at every combination of D and K
for D, K, N in [x for x in list(itertools.product(Ds, Ks,Ns))]:
df.ix[(df['n']==N) & (df['d'] == D) & (df['k']==K), 'seq_time'] = df[(df['n']==N) & (df['d'] == D) & (df['k']==K) & (df['algorithm']=='sequential')]['time'].iloc[0]
df['seqtime_speedup'] = df['seq_time'] / df['time']
for D, K in [x for x in list(itertools.product(Ds, Ks))]:
df_n = df[(df['d']==D) & (df['k']==K) & (df['algorithm']!='sequential') & (df['algorithm']!='stock')]
sns_plot = sns.FacetGrid(df_n, hue="algorithm", size=6, legend_out=True)
sns_plot = sns_plot.map(plt.plot, "n", "seqtime_speedup", marker='o')
sns_plot = sns_plot.set(xticklabels=[str(n) for n in Ns])
sns_plot = sns_plot.add_legend()
plt.title('Speedup of Parallel K-means Algorithms by N: d=%s, k=%s' % (str(D), str(K)))
plt.xlabel('Dataset N')
plt.ylabel('Speedup Compared to Sequential Algorithm')
#plt.show()
sns_plot.savefig(os.getcwd() + '/plots/speedup-line-n-%s-%s.png' % (str(D), str(K)))
plt.close()
In [56]:
### Time relative to sequential
# line plots for time relative to sequential for every K and D
df['frac_seq'] = (df['time'] /df['seq_time']) * 100
for D, K in [x for x in list(itertools.product(Ds, Ks))]:
df_n = df[(df['d']==D) & (df['k']==K)]
sns_plot = sns.FacetGrid(df_n, hue="algorithm", size=6, legend_out=True)
sns_plot = sns_plot.map(plt.plot, "n", "frac_seq", marker='o')
sns_plot = sns_plot.set(xticklabels=[str(n) for n in Ns])
sns_plot = sns_plot.add_legend()
plt.title('Time fraction relative to sequential by N: d=%s, k=%s' % (str(D), str(K)))
plt.xlabel('Dataset N')
plt.ylabel('Percent')
#plt.show()
sns_plot.savefig(os.getcwd() + '/plots/percent_seq-line-n-%s-%s.png' % (str(D), str(K)))
plt.close()
# line plots for every algorithm for D vs. GFlop/s at every combination of N and K
for N, K in [x for x in list(itertools.product(Ns, Ks))]:
df_n = df[(df['n']==N) & (df['k']==K)]
sns_plot = sns.FacetGrid(df_n, hue="algorithm", size=6, legend_out=True)
sns_plot = sns_plot.map(plt.plot, "d", "frac_seq", marker='o')
sns_plot = sns_plot.set(xticklabels=[str(d) for d in Ds])
sns_plot = sns_plot.add_legend()
plt.title('Time fraction relative to sequential by D: n=%s, k=%s' % (str(N), str(K)))
plt.xlabel('Dataset N')
plt.ylabel('Percent')
#plt.show()
sns_plot.savefig(os.getcwd() + '/plots/percent_seq-line-d-%s-%s.png' % (str(N), str(K)))
plt.close()
# line plots for every algorithm for K vs. GFlop/s at every combination of N and D
for N, D in [x for x in list(itertools.product(Ns, Ds))]:
df_n = df[(df['n']==N) & (df['d']==D)]
sns_plot = sns.FacetGrid(df_n, hue="algorithm", size=6, legend_out=True)
sns_plot = sns_plot.map(plt.plot, "k", "frac_seq", marker='o')
sns_plot = sns_plot.set(xticklabels=[str(k) for k in Ks])
sns_plot = sns_plot.add_legend()
plt.title('Time fraction relative to sequential by K: n=%s, d=%s' % (str(N), str(D)))
plt.xlabel('Dataset N')
plt.ylabel('Percent')
#plt.show()
sns_plot.savefig(os.getcwd() + '/plots/percent_seq-line-k-%s-%s.png' % (str(N), str(D)))
plt.close()
In [ ]:
In [ ]: