In [39]:
import os
import itertools
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style="darkgrid")

df = pd.read_csv(os.getcwd() + 'output/output.csv')
#df['gflops'] = df['arithmetic intensity'] + df['time']
df['gflops'] = (df['n'] * df['d'] * df['k']/1E9 )/ df['time']
df['time_speedup'] = df['arithmetic intensity'] + df['time']

algorithms = df['algorithm'].unique().tolist()
Ns = df['n'].unique().tolist()
Ds = df['d'].unique().tolist()
Ks = df['k'].unique().tolist()

In [40]:
# bar plots for every algorithm at every combination of N, D, and K

# TIME
for N, D, K in [x for x in list(itertools.product(Ns, Ds, Ks))]:    
    sns.plt.clf()
    fig = sns.plt.gcf()
    sns.barplot(x="algorithm", y="time", data=df[(df['n']==N) & (df['d']==D) & (df['k']==K)])
    plt.title('Time Performance of K-means Algorithms: n=%s, d=%s, k=%s' % (str(N), str(D), str(K)))
    plt.xlabel('Algorithm Type')
    plt.ylabel('Time (s)')
    #plt.show()
    fig.savefig(os.getcwd() + '/plots/time-bar-%s-%s-%s.png' % (str(N), str(D), str(K)))
    plt.close()
    
# GFLOP/S
for N, D, K in [x for x in list(itertools.product(Ns, Ds, Ks))]:    
    sns.plt.clf()
    fig = sns.plt.gcf()
    sns.barplot(x="algorithm", y="gflops", data=df[(df['n']==N) & (df['d']==D) & (df['k']==K)])
    plt.title('GFlop/s Performance of K-means Algorithms: n=%s, d=%s, k=%s' % (str(N), str(D), str(K)))
    plt.xlabel('Algorithm Type')
    plt.ylabel('Time (s)')
    #plt.show()
    fig.savefig(os.getcwd() + '/plots/gflops-bar-%s-%s-%s.png' % (str(N), str(D), str(K)))
    plt.close()

In [41]:
### TIME line plots

# line plots for every algorithm for N vs. time at every combination of D and K

for D, K in [x for x in list(itertools.product(Ds, Ks))]:    
    df_n = df[(df['d']==D) & (df['k']==K)]
    sns_plot = sns.FacetGrid(df_n, hue="algorithm", size=6, legend_out=True)
    sns_plot = sns_plot.map(plt.plot, "n", "time", marker='o')
    sns_plot = sns_plot.set(xticklabels=[str(n) for n in Ns])
    sns_plot = sns_plot.add_legend()
    plt.title('Time Performance of K-means Algorithms by N: d=%s, k=%s' % (str(D), str(K)))
    plt.xlabel('Dataset N')
    plt.ylabel('Time (s)')
    #plt.show()
    sns_plot.savefig(os.getcwd() + '/plots/time-line-n-%s-%s.png' % (str(D), str(K)))
    plt.close()   
    
# line plots for every algorithm for D vs. time at every combination of N and K

for N, K in [x for x in list(itertools.product(Ns, Ks))]:    
    df_n = df[(df['n']==N) & (df['k']==K)]
    sns_plot = sns.FacetGrid(df_n, hue="algorithm", size=6, legend_out=True)
    sns_plot = sns_plot.map(plt.plot, "d", "time", marker='o')
    sns_plot = sns_plot.set(xticklabels=[str(d) for d in Ds])
    sns_plot = sns_plot.add_legend()
    plt.title('Time Performance of K-means Algorithms by D: n=%s, k=%s' % (str(N), str(K)))
    plt.xlabel('Dataset N')
    plt.ylabel('Time (s)')
    #plt.show()
    sns_plot.savefig(os.getcwd() + '/plots/time-line-d-%s-%s.png' % (str(N), str(K)))
    plt.close()

# line plots for every algorithm for K vs. time at every combination of N and D

for N, D in [x for x in list(itertools.product(Ns, Ds))]:    
    df_n = df[(df['n']==N) & (df['d']==D)]
    sns_plot = sns.FacetGrid(df_n, hue="algorithm", size=6, legend_out=True)
    sns_plot = sns_plot.map(plt.plot, "k", "time", marker='o')
    sns_plot = sns_plot.set(xticklabels=[str(k) for k in Ks])
    sns_plot = sns_plot.add_legend()
    plt.title('Time Performance of K-means Algorithms by K: n=%s, d=%s' % (str(N), str(D)))
    plt.xlabel('Dataset N')
    plt.ylabel('Time (s)')
    #plt.show()
    sns_plot.savefig(os.getcwd() + '/plots/time-line-k-%s-%s.png' % (str(N), str(D)))
    plt.close()

In [42]:
### GFlop/s line plots

# line plots for every algorithm for N vs. GFlop/s at every combination of D and K

for D, K in [x for x in list(itertools.product(Ds, Ks))]:    
    df_n = df[(df['d']==D) & (df['k']==K)]
    sns_plot = sns.FacetGrid(df_n, hue="algorithm", size=6, legend_out=True)
    sns_plot = sns_plot.map(plt.plot, "n", "gflops", marker='o')
    sns_plot = sns_plot.set(xticklabels=[str(n) for n in Ns])
    sns_plot = sns_plot.add_legend()
    plt.title('GFlop/s Performance of K-means Algorithms by N: d=%s, k=%s' % (str(D), str(K)))
    plt.xlabel('Dataset N')
    plt.ylabel('GFlop/s')
    #plt.show()
    sns_plot.savefig(os.getcwd() + '/plots/gflops-line-n-%s-%s.png' % (str(D), str(K)))
    plt.close()
    
# line plots for every algorithm for D vs. GFlop/s at every combination of N and K

for N, K in [x for x in list(itertools.product(Ns, Ks))]:    
    df_n = df[(df['n']==N) & (df['k']==K)]
    sns_plot = sns.FacetGrid(df_n, hue="algorithm", size=6, legend_out=True)
    sns_plot = sns_plot.map(plt.plot, "d", "gflops", marker='o')
    sns_plot = sns_plot.set(xticklabels=[str(d) for d in Ds])
    sns_plot = sns_plot.add_legend()
    plt.title('GFlop/s Performance of K-means Algorithms by D: n=%s, k=%s' % (str(N), str(K)))
    plt.xlabel('Dataset N')
    plt.ylabel('GFlop/s')
    #plt.show()
    sns_plot.savefig(os.getcwd() + '/plots/gflops-line-d-%s-%s.png' % (str(N), str(K)))
    plt.close()

# line plots for every algorithm for K vs. GFlop/s at every combination of N and D

for N, D in [x for x in list(itertools.product(Ns, Ds))]:    
    df_n = df[(df['n']==N) & (df['d']==D)]
    sns_plot = sns.FacetGrid(df_n, hue="algorithm", size=6, legend_out=True)
    sns_plot = sns_plot.map(plt.plot, "k", "gflops", marker='o')
    sns_plot = sns_plot.set(xticklabels=[str(k) for k in Ks])
    sns_plot = sns_plot.add_legend()
    plt.title('GFlop/s Performance of K-means Algorithms by K: n=%s, d=%s' % (str(N), str(D)))
    plt.xlabel('Dataset N')
    plt.ylabel('GFlop/s')
    #plt.show()
    sns_plot.savefig(os.getcwd() + '/plots/gflops-line-k-%s-%s.png' % (str(N), str(D)))
    plt.close()

In [51]:
### TIME line plots as % of sequential time ("speed-up"?)

# line plots for every algorithm for N vs. time at every combination of D and K

for D, K, N in [x for x in list(itertools.product(Ds, Ks,Ns))]:  
    df.ix[(df['n']==N) & (df['d'] == D) & (df['k']==K), 'seq_time'] = df[(df['n']==N) & (df['d'] == D) & (df['k']==K) & (df['algorithm']=='sequential')]['time'].iloc[0]
df['seqtime_speedup'] = df['seq_time'] / df['time'] 

for D, K in [x for x in list(itertools.product(Ds, Ks))]:    
    df_n = df[(df['d']==D) & (df['k']==K) & (df['algorithm']!='sequential') & (df['algorithm']!='stock')]
    sns_plot = sns.FacetGrid(df_n, hue="algorithm", size=6, legend_out=True)
    sns_plot = sns_plot.map(plt.plot, "n", "seqtime_speedup", marker='o')
    sns_plot = sns_plot.set(xticklabels=[str(n) for n in Ns])
    sns_plot = sns_plot.add_legend()
    plt.title('Speedup of Parallel K-means Algorithms by N: d=%s, k=%s' % (str(D), str(K)))
    plt.xlabel('Dataset N')
    plt.ylabel('Speedup Compared to Sequential Algorithm')
    #plt.show()
    sns_plot.savefig(os.getcwd() + '/plots/speedup-line-n-%s-%s.png' % (str(D), str(K)))
    plt.close()

In [56]:
### Time relative to sequential
# line plots for time relative to sequential for every K and D

df['frac_seq'] = (df['time'] /df['seq_time']) * 100

for D, K in [x for x in list(itertools.product(Ds, Ks))]:    
    df_n = df[(df['d']==D) & (df['k']==K)]
    sns_plot = sns.FacetGrid(df_n, hue="algorithm", size=6, legend_out=True)
    sns_plot = sns_plot.map(plt.plot, "n", "frac_seq", marker='o')
    sns_plot = sns_plot.set(xticklabels=[str(n) for n in Ns])
    sns_plot = sns_plot.add_legend()
    plt.title('Time fraction relative to sequential by N: d=%s, k=%s' % (str(D), str(K)))
    plt.xlabel('Dataset N')
    plt.ylabel('Percent')
    #plt.show()
    sns_plot.savefig(os.getcwd() + '/plots/percent_seq-line-n-%s-%s.png' % (str(D), str(K)))
    plt.close()
    
# line plots for every algorithm for D vs. GFlop/s at every combination of N and K

for N, K in [x for x in list(itertools.product(Ns, Ks))]:    
    df_n = df[(df['n']==N) & (df['k']==K)]
    sns_plot = sns.FacetGrid(df_n, hue="algorithm", size=6, legend_out=True)
    sns_plot = sns_plot.map(plt.plot, "d", "frac_seq", marker='o')
    sns_plot = sns_plot.set(xticklabels=[str(d) for d in Ds])
    sns_plot = sns_plot.add_legend()
    plt.title('Time fraction relative to sequential by D: n=%s, k=%s' % (str(N), str(K)))
    plt.xlabel('Dataset N')
    plt.ylabel('Percent')
    #plt.show()
    sns_plot.savefig(os.getcwd() + '/plots/percent_seq-line-d-%s-%s.png' % (str(N), str(K)))
    plt.close()

# line plots for every algorithm for K vs. GFlop/s at every combination of N and D

for N, D in [x for x in list(itertools.product(Ns, Ds))]:    
    df_n = df[(df['n']==N) & (df['d']==D)]
    sns_plot = sns.FacetGrid(df_n, hue="algorithm", size=6, legend_out=True)
    sns_plot = sns_plot.map(plt.plot, "k", "frac_seq", marker='o')
    sns_plot = sns_plot.set(xticklabels=[str(k) for k in Ks])
    sns_plot = sns_plot.add_legend()
    plt.title('Time fraction relative to sequential by K: n=%s, d=%s' % (str(N), str(D)))
    plt.xlabel('Dataset N')
    plt.ylabel('Percent')
    #plt.show()
    sns_plot.savefig(os.getcwd() + '/plots/percent_seq-line-k-%s-%s.png' % (str(N), str(D)))
    plt.close()

In [ ]:


In [ ]: