Data Analysis for AM 205 Project

Wesley Chen, Brandon Sim, Andy Shi


In [1]:
import numpy as np
import matplotlib.pyplot as plt
% matplotlib inline
import pandas as pd
import itertools
import seaborn

plt.rcParams['figure.figsize'] = (10, 8)

In [2]:
data1 = pd.read_csv("data1.csv")
data1['Example'] = 1
data1.head()


Out[2]:
n_procs numSteps qualityfactor corrections serial_max parallel_max serial_mean parallel_mean serial_time parallel_time Example
0 2 100 100 2 0.000062 0.00615 0.000026 0.002623 0.144839 7.290737 1
1 4 100 10 2 0.000619 0.00615 0.000264 0.002623 0.016247 4.734909 1
2 4 100 50 2 0.000124 0.00615 0.000053 0.002623 0.068887 5.979487 1
3 8 100 100 2 0.000062 0.00615 0.000026 0.002623 0.226073 3.781236 1
4 16 100 10 2 0.000619 0.00615 0.000264 0.002623 0.082640 5.872348 1

In [3]:
data2 = pd.read_csv("data2.csv")
data2['Example'] = 2
data2.head()


Out[3]:
n_procs numSteps qualityfactor corrections serial_max parallel_max serial_mean parallel_mean serial_time parallel_time Example
0 2 100 100 2 0.000353 0.037062 0.000163 0.017199 0.273088 15.602692 2
1 4 100 10 2 0.003544 0.037062 0.001641 0.017199 0.034686 10.134147 2
2 4 100 50 2 0.000706 0.037062 0.000327 0.017199 0.131030 9.705749 2
3 8 100 100 2 0.000353 0.037062 0.000163 0.017199 0.323222 6.800989 2
4 16 100 10 2 0.003544 0.037062 0.001641 0.017199 0.095807 13.253634 2

In [4]:
data = pd.concat([data1, data2], axis=0, ignore_index=True)
data


Out[4]:
n_procs numSteps qualityfactor corrections serial_max parallel_max serial_mean parallel_mean serial_time parallel_time Example
0 2 100 100 2 0.000062 0.006150 0.000026 0.002623 0.144839 7.290737 1
1 4 100 10 2 0.000619 0.006150 0.000264 0.002623 0.016247 4.734909 1
2 4 100 50 2 0.000124 0.006150 0.000053 0.002623 0.068887 5.979487 1
3 8 100 100 2 0.000062 0.006150 0.000026 0.002623 0.226073 3.781236 1
4 16 100 10 2 0.000619 0.006150 0.000264 0.002623 0.082640 5.872348 1
5 16 100 50 2 0.000124 0.006150 0.000053 0.002623 0.144870 2.524051 1
6 16 100 100 2 0.000062 0.006150 0.000026 0.002623 0.144561 4.177090 1
7 16 100 100 1 0.000062 0.006243 0.000026 0.002663 0.884363 0.024045 1
8 16 100 100 2 0.000062 0.006150 0.000026 0.002623 0.476075 1.524805 1
9 16 100 100 5 0.000062 0.006243 0.000026 0.002663 0.138123 4.277979 1
10 16 100 100 10 0.000062 0.006150 0.000026 0.002623 0.276352 9.230153 1
11 16 100 100 50 0.000062 0.006150 0.000026 0.002623 0.307629 48.117501 1
12 16 100 100 100 0.000062 0.006150 0.000026 0.002623 0.518091 105.657221 1
13 32 100 10 2 0.000619 0.006150 0.000264 0.002623 0.024898 2.224934 1
14 32 100 50 2 0.000124 0.006150 0.000053 0.002623 0.071008 2.119910 1
15 32 100 100 1 0.000062 0.006243 0.000026 0.002663 0.443254 0.047888 1
16 32 100 100 2 0.000062 0.006150 0.000026 0.002623 0.119375 1.640201 1
17 32 100 100 5 0.000062 0.006243 0.000026 0.002663 0.309229 7.003868 1
18 32 100 100 10 0.000062 0.006150 0.000026 0.002623 0.263915 14.185466 1
19 32 100 100 50 0.000062 0.006150 0.000026 0.002623 0.279576 72.412345 1
20 32 100 100 100 0.000062 0.006150 0.000026 0.002623 0.298442 161.320850 1
21 2 100 100 2 0.000353 0.037062 0.000163 0.017199 0.273088 15.602692 2
22 4 100 10 2 0.003544 0.037062 0.001641 0.017199 0.034686 10.134147 2
23 4 100 50 2 0.000706 0.037062 0.000327 0.017199 0.131030 9.705749 2
24 8 100 100 2 0.000353 0.037062 0.000163 0.017199 0.323222 6.800989 2
25 16 100 10 2 0.003544 0.037062 0.001641 0.017199 0.095807 13.253634 2
26 16 100 50 2 0.000706 0.037062 0.000327 0.017199 0.186552 5.194865 2
27 16 100 100 2 0.000353 0.037062 0.000163 0.017199 0.379339 9.783687 2
28 16 100 100 1 0.000353 0.037062 0.000163 0.017205 0.304592 0.033993 2
29 16 100 100 2 0.000353 0.037062 0.000163 0.017199 0.300513 2.786015 2
30 16 100 100 5 0.000353 0.037062 0.000163 0.017205 0.296380 8.982083 2
31 16 100 100 10 0.000353 0.037062 0.000163 0.017199 0.315678 19.845286 2
32 16 100 100 50 0.000353 0.037062 0.000163 0.017199 0.283889 105.358575 2
33 16 100 100 100 0.000353 0.037062 0.000163 0.017199 0.308455 230.499720 2
34 32 100 10 2 0.003544 0.037062 0.001641 0.017199 0.062029 3.863887 2
35 32 100 50 2 0.000706 0.037062 0.000327 0.017199 0.139086 4.521666 2
36 32 100 100 1 0.000353 0.037062 0.000163 0.017205 0.511949 0.041012 2
37 32 100 100 2 0.000353 0.037062 0.000163 0.017199 0.273211 3.680919 2
38 32 100 100 5 0.000353 0.037062 0.000163 0.017205 0.289979 16.930516 2
39 32 100 100 10 0.000353 0.037062 0.000163 0.017199 0.286186 30.488317 2
40 32 100 100 50 0.000353 0.037062 0.000163 0.017199 0.269222 117.171796 2
41 32 100 100 100 0.000353 0.037062 0.000163 0.017199 0.311367 281.408144 2

In [5]:
def plot_stuff(grouped, x, field1, field2, xlab="", ylab="", title="", fname="plot.pdf"):
    grouped1 = grouped[grouped['Example'] == 1]
    grouped2 = grouped[grouped['Example'] == 2]
    plt.plot(grouped1[x], grouped1[field1], '-o', label='Example 1 Serial')
    plt.plot(grouped1[x], grouped1[field2], '-o', label='Example 1 Parallel')
    plt.plot(grouped2[x], grouped2[field1], '-o', label='Example 2 Serial')
    plt.plot(grouped2[x], grouped2[field2], '-o', label='Example 2 Parallel')
    plt.legend(loc='best', bbox_to_anchor = (1.25,1))
    plt.title(title)
    plt.ylabel(ylab)
    plt.xlabel(xlab)
    plt.savefig(fname, bbox_inches="tight")
    plt.show()

In [6]:
# plot of runtime vs CPUs
criterion = data['corrections'].isin([2]) & data['qualityfactor'].isin([100])
plt_data = data[criterion]
grouped = plt_data.groupby(['n_procs', 'Example'], as_index=False).aggregate(np.mean)
plot_stuff(grouped, 'n_procs', 'serial_time', 'parallel_time', 'Number of CPUs', 
           'Wall Clock Time (sec)', 'Runtime vs. CPUs', 'runtime_vs_cpus.pdf')



In [7]:
# plot of runtime vs number of corrections
plt_df = data[data['n_procs'].isin([16]) & data['qualityfactor'].isin([100])]
grouped = plt_df.groupby(['corrections', 'Example'], as_index=False).aggregate(np.mean)
plot_stuff(grouped, 'corrections', 'serial_time', 'parallel_time', 'Number of Corrections',
           'Wall Clock Time (sec)', 'Runtime vs. Number of Correction Steps', 'runtime_vs_corrections.pdf')



In [8]:
# plot of accuracy vs number of corrections
plt_df = data[data['n_procs'].isin([32]) & data['qualityfactor'].isin([100])]
grouped = plt_df.groupby(['corrections', 'Example'], as_index=False).aggregate(np.mean)
plot_stuff(grouped, 'corrections', 'serial_mean', 'parallel_mean', 'Number of Corrections',
           'Absolute Error', 'Error vs. Number of Correction Steps', 'error_vs_corrections.pdf')



In [9]:
# plot of accuracy vs qualityfactor
plt_df = data[data['n_procs'].isin([32]) & data['corrections'].isin([2])]
grouped = plt_df.groupby(['qualityfactor', 'Example'], as_index=False).aggregate(np.mean)
plot_stuff(grouped, 'qualityfactor', 'serial_mean', 'parallel_mean', 'Quality Factor',
           'Absolute Error', 'Error vs. Quality Factor', 'error_vs_qualityfactor.pdf')