Quickly combine a timing and properties file


In [24]:
import pandas_profiling
import pandas as pd

In [27]:
props = pd.read_csv('../matrix_properties/processed_properties.csv', header=0, index_col=0)
system_info = pd.read_csv('../systems_info/systems_info.csv', header=0, index_col=0)
timings = pd.read_csv("../processed_timings/np_specific/combined_np12_timings.csv", header=0, index_col=0)

In [29]:
props.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 1828 entries, 0 to 1827
Data columns (total 37 columns):
rows                  1828 non-null int64
cols                  1828 non-null int64
min_nnz_row           1828 non-null int64
row_var               1828 non-null float64
col_var               1828 non-null float64
diag_var              1828 non-null float64
nnz                   1828 non-null int64
frob_norm             1828 non-null float64
symm_frob_norm        1828 non-null float64
antisymm_frob_norm    1828 non-null float64
one_norm              1828 non-null float64
inf_norm              1828 non-null float64
symm_inf_norm         1828 non-null float64
antisymm_inf_norm     1828 non-null float64
max_nnz_row           1828 non-null int64
trace                 1828 non-null float64
abs_trace             1828 non-null float64
min_nnz_row.1         1828 non-null int64
avg_nnz_row           1828 non-null int64
dummy_rows            1828 non-null int64
dummy_rows_kind       1828 non-null int64
num_value_symm_1      1828 non-null int64
nnz_pattern_symm_1    1828 non-null int64
num_value_symm_2      1828 non-null float64
nnz_pattern_symm_2    1828 non-null float64
row_diag_dom          1828 non-null int64
col_diag_dom          1828 non-null int64
diag_avg              1828 non-null float64
diag_sign             1828 non-null int64
diag_nnz              1828 non-null int64
lower_bw              1828 non-null int64
upper_bw              1828 non-null int64
row_log_val_spread    1828 non-null float64
col_log_val_spread    1828 non-null float64
symm                  1828 non-null int64
matrix                1828 non-null object
matrix_id             1828 non-null int64
dtypes: float64(17), int64(19), object(1)
memory usage: 542.7+ KB

In [30]:
system_info.info()


<class 'pandas.core.frame.DataFrame'>
Index: 42 entries, system_id to memory_type
Data columns (total 5 columns):
bridges     42 non-null float64
comet       42 non-null float64
stampede    42 non-null float64
summit      42 non-null float64
laptop      42 non-null float64
dtypes: float64(5)
memory usage: 2.0+ KB

In [31]:
timings.groupby('system_id').mean()


Out[31]:
np time iters resid solver_id prec_id status_id good_or_bad new_time matrix_id
system_id
1 12.0 7.694270 6604.875039 inf 3.712206 1.999817 -0.206085 -0.954745 inf -2.163744e+17
2 12.0 6.605045 6593.677270 inf 3.713312 1.999821 -0.258778 -0.937427 inf -1.912320e+17
3 12.0 8.954214 6604.778373 inf 3.713201 1.999715 -0.211476 -0.930880 inf -1.855716e+17
4 12.0 7.634922 6608.627975 inf 3.712082 1.999848 -0.203067 -0.938665 inf -2.121048e+17

In [33]:
flipped = system_info.transpose()
flipped.info()


<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, bridges to laptop
Data columns (total 42 columns):
system_id                               5 non-null float64
HPL_Tflops                              5 non-null float64
StarDGEMM_Gflops                        5 non-null float64
SingleDGEMM_Gflops                      5 non-null float64
PTRANS_GBs                              5 non-null float64
MPIRandomAccess_LCG_GUPs                5 non-null float64
MPIRandomAccess_GUPs                    5 non-null float64
StarRandomAccess_LCG_GUPs               5 non-null float64
SingleRandomAccess_LCG_GUPs             5 non-null float64
StarRandomAccess_GUPs                   5 non-null float64
SingleRandomAccess_GUPs                 5 non-null float64
StarSTREAM_Copy                         5 non-null float64
StarSTREAM_Scale                        5 non-null float64
StarSTREAM_Add                          5 non-null float64
StarSTREAM_Triad                        5 non-null float64
SingleSTREAM_Copy                       5 non-null float64
SingleSTREAM_Scale                      5 non-null float64
SingleSTREAM_Add                        5 non-null float64
SingleSTREAM_Triad                      5 non-null float64
StarFFT_Gflops                          5 non-null float64
SingleFFT_Gflops                        5 non-null float64
MPIFFT_Gflops                           5 non-null float64
MaxPingPongLatency_usec                 5 non-null float64
RandomlyOrderedRingLatency_usec         5 non-null float64
MinPingPongBandwidth_GBytes             5 non-null float64
NaturallyOrderedRingBandwidth_GBytes    5 non-null float64
RandomlyOrderedRingBandwidth_GBytes     5 non-null float64
MinPingPongLatency_usec                 5 non-null float64
AvgPingPongLatency_usec                 5 non-null float64
MaxPingPongBandwidth_GBytes             5 non-null float64
AvgPingPongBandwidth_GBytes             5 non-null float64
NaturallyOrderedRingLatency_usec        5 non-null float64
MemProc                                 5 non-null float64
core_count                              5 non-null float64
cpu_freq                                5 non-null float64
bogo_mips                               5 non-null float64
l1_cache                                5 non-null float64
l2_cache                                5 non-null float64
l3_cache                                5 non-null float64
memory_size                             5 non-null float64
memory_freq                             5 non-null float64
memory_type                             5 non-null float64
dtypes: float64(42)
memory usage: 1.7+ KB

In [40]:
timings_and_system_info = pd.merge(timings, flipped, on='system_id')
timings_and_system_info = timings_and_system_info.drop_duplicates()
timings_and_system_info = timings_and_system_info.dropna()
timings_and_system_info.to_csv('./np12_timings_and_system_info.csv',index=False)

In [37]:
timings_and_system_info.head()


Out[37]:
system np matrix solver prec status time iters resid system_id ... MemProc core_count cpu_freq bogo_mips l1_cache l2_cache l3_cache memory_size memory_freq memory_type
0 bridges 12 saylr3.mtx FIXED_POINT ILUT unconverged 0.734205 10000.0 0.260768 1 ... 1024.0 28.0 2300.0 4604.72 32.0 256.0 35840.0 128.0 2133.0 4.0
1 bridges 12 saylr3.mtx FIXED_POINT RILUK error 0.000000 NaN NaN 1 ... 1024.0 28.0 2300.0 4604.72 32.0 256.0 35840.0 128.0 2133.0 4.0
2 bridges 12 saylr3.mtx FIXED_POINT RELAXATION unconverged 0.495889 10000.0 0.063474 1 ... 1024.0 28.0 2300.0 4604.72 32.0 256.0 35840.0 128.0 2133.0 4.0
3 bridges 12 saylr3.mtx FIXED_POINT CHEBYSHEV error 0.000000 NaN NaN 1 ... 1024.0 28.0 2300.0 4604.72 32.0 256.0 35840.0 128.0 2133.0 4.0
4 bridges 12 saylr3.mtx FIXED_POINT NONE error 0.000000 NaN NaN 1 ... 1024.0 28.0 2300.0 4604.72 32.0 256.0 35840.0 128.0 2133.0 4.0

5 rows × 57 columns


In [10]:
combined = pd.merge(props,timings, on='matrix_id')
combined = combined.drop_duplicates()
combined = combined.dropna()
combined.to_csv(loc + '_combined_timings.csv')

In [ ]:
system_combined = pd.merge()