In [239]:
import csv
import pandas as pd
import numpy as np
from IPython.display import Markdown

In [110]:
%%HTML
<style type="text/css">
    td.data {
        border: 1px solid #666;
    }
</style>


Topology

Hemera K80


In [59]:
f = open("results-p2p.txt","r")
f.readline() # skip first row
ngpus = int(f.readline().split(":")[1])
f.close()
"Number of GPUs: %d" % ngpus


Out[59]:
'Number of GPUs: 8'

In [325]:
f = open("results-cpus.txt","r")
print(f.read())
f.close()
f = open("results-clocks-pci.txt","r")
print(f.read())
f.close()


available: 2 nodes (0-1)
node 0 cpus: 0 1 2 3 4 5 6 7 16 17 18 19 20 21 22 23
node 0 size: 130977 MB
node 0 free: 113609 MB
node 1 cpus: 8 9 10 11 12 13 14 15 24 25 26 27 28 29 30 31
node 1 size: 131072 MB
node 1 free: 110252 MB
node distances:
node   0   1 
  0:  10  21 
  1:  21  10 

--- ALL NVML/nvidia-smi Devices -----
0: Dev Name, Tesla K80, PCI Bus (domain:bus:dev), 00000000:04:00.0, PCI Bus, 4, PCI Domain, 0, PCI Dev, 0, PCI DevID, 271388894, PCI SubsysID, 275517662
1: Dev Name, Tesla K80, PCI Bus (domain:bus:dev), 00000000:05:00.0, PCI Bus, 5, PCI Domain, 0, PCI Dev, 0, PCI DevID, 271388894, PCI SubsysID, 275517662
2: Dev Name, Tesla K80, PCI Bus (domain:bus:dev), 00000000:08:00.0, PCI Bus, 8, PCI Domain, 0, PCI Dev, 0, PCI DevID, 271388894, PCI SubsysID, 275517662
3: Dev Name, Tesla K80, PCI Bus (domain:bus:dev), 00000000:09:00.0, PCI Bus, 9, PCI Domain, 0, PCI Dev, 0, PCI DevID, 271388894, PCI SubsysID, 275517662
4: Dev Name, Tesla K80, PCI Bus (domain:bus:dev), 00000000:84:00.0, PCI Bus, 132, PCI Domain, 0, PCI Dev, 0, PCI DevID, 271388894, PCI SubsysID, 275517662
5: Dev Name, Tesla K80, PCI Bus (domain:bus:dev), 00000000:85:00.0, PCI Bus, 133, PCI Domain, 0, PCI Dev, 0, PCI DevID, 271388894, PCI SubsysID, 275517662
6: Dev Name, Tesla K80, PCI Bus (domain:bus:dev), 00000000:88:00.0, PCI Bus, 136, PCI Domain, 0, PCI Dev, 0, PCI DevID, 271388894, PCI SubsysID, 275517662
7: Dev Name, Tesla K80, PCI Bus (domain:bus:dev), 00000000:89:00.0, PCI Bus, 137, PCI Domain, 0, PCI Dev, 0, PCI DevID, 271388894, PCI SubsysID, 275517662
--- Visible CUDA Devices -----
0: Tesla K80
 GraphicsClock CUDA & NVML (target clock):   823 & 875 MHz( @ 875 MHz)
 MemClock CUDA & NVML (target clock):        2505 & 2505 MHz( @ 2505 MHz)
 PCI-BUS ID:                                 00000000:04:00.0
 nvidia-smi ID (for flag '-i'):              0
1: Tesla K80
 GraphicsClock CUDA & NVML (target clock):   823 & 875 MHz( @ 875 MHz)
 MemClock CUDA & NVML (target clock):        2505 & 2505 MHz( @ 2505 MHz)
 PCI-BUS ID:                                 00000000:05:00.0
 nvidia-smi ID (for flag '-i'):              1
2: Tesla K80
 GraphicsClock CUDA & NVML (target clock):   823 & 875 MHz( @ 875 MHz)
 MemClock CUDA & NVML (target clock):        2505 & 2505 MHz( @ 2505 MHz)
 PCI-BUS ID:                                 00000000:08:00.0
 nvidia-smi ID (for flag '-i'):              2
3: Tesla K80
 GraphicsClock CUDA & NVML (target clock):   823 & 875 MHz( @ 875 MHz)
 MemClock CUDA & NVML (target clock):        2505 & 2505 MHz( @ 2505 MHz)
 PCI-BUS ID:                                 00000000:09:00.0
 nvidia-smi ID (for flag '-i'):              3
4: Tesla K80
 GraphicsClock CUDA & NVML (target clock):   823 & 875 MHz( @ 875 MHz)
 MemClock CUDA & NVML (target clock):        2505 & 2505 MHz( @ 2505 MHz)
 PCI-BUS ID:                                 00000000:84:00.0
 nvidia-smi ID (for flag '-i'):              4
5: Tesla K80
 GraphicsClock CUDA & NVML (target clock):   823 & 875 MHz( @ 875 MHz)
 MemClock CUDA & NVML (target clock):        2505 & 2505 MHz( @ 2505 MHz)
 PCI-BUS ID:                                 00000000:85:00.0
 nvidia-smi ID (for flag '-i'):              5
6: Tesla K80
 GraphicsClock CUDA & NVML (target clock):   823 & 875 MHz( @ 875 MHz)
 MemClock CUDA & NVML (target clock):        2505 & 2505 MHz( @ 2505 MHz)
 PCI-BUS ID:                                 00000000:88:00.0
 nvidia-smi ID (for flag '-i'):              6
7: Tesla K80
 GraphicsClock CUDA & NVML (target clock):   823 & 875 MHz( @ 875 MHz)
 MemClock CUDA & NVML (target clock):        2505 & 2505 MHz( @ 2505 MHz)
 PCI-BUS ID:                                 00000000:89:00.0
 nvidia-smi ID (for flag '-i'):              7


In [150]:
fc = pd.read_table("results-topo.txt",nrows=ngpus)
fc.style.hide_index().applymap(lambda s: "background: #77cc66" if "PIX" == s
                                else "background: #aaee88" if "PHB"==s
                                else "background: #eeaaaa" if "SYS"==s
                                else '')


Out[150]:
Unnamed: 0 GPU0 GPU1 GPU2 GPU3 GPU4 GPU5 GPU6 GPU7 mlx4_0 mlx4_1 CPU Affinity
GPU0 X PIX PHB PHB SYS SYS SYS SYS PHB SYS 0-0,16-16
GPU1 PIX X PHB PHB SYS SYS SYS SYS PHB SYS 0-0,16-16
GPU2 PHB PHB X PIX SYS SYS SYS SYS PHB SYS 0-0,16-16
GPU3 PHB PHB PIX X SYS SYS SYS SYS PHB SYS 0-0,16-16
GPU4 SYS SYS SYS SYS X PIX PHB PHB SYS PHB 0-0,16-16
GPU5 SYS SYS SYS SYS PIX X PHB PHB SYS PHB 0-0,16-16
GPU6 SYS SYS SYS SYS PHB PHB X PIX SYS PHB 0-0,16-16
GPU7 SYS SYS SYS SYS PHB PHB PIX X SYS PHB 0-0,16-16

P2P Matrix


In [303]:
def from_sdkp2p(tableindex, twidth=0, hide="D\D"):
    if twidth==0:
        df = pd.read_table("results-sdk-p2p.txt",
                           skiprows=tableindex*(ngpus+2),
                           nrows=ngpus,
                           skipinitialspace=True,
                           sep=" ").dropna(axis=1)
    else:
        df = pd.read_fwf("results-sdk-p2p.txt",
                         skiprows=tableindex*(ngpus+2),
                         nrows=ngpus,
                         skipinitialspace=True,
                         widths=tuple(np.repeat(twidth, ngpus+1)))
    #df.drop(df.columns[[0]], axis=1)
    return df.style.hide_index().hide_columns([hide])

In [293]:
from_sdkp2p(1).highlight_max(axis=0)


Out[293]:
0 1 2 3 4 5 6 7
1 1 1 1 0 0 0 0
1 1 1 1 0 0 0 0
1 1 1 1 0 0 0 0
1 1 1 1 0 0 0 0
0 0 0 0 1 1 1 1
0 0 0 0 1 1 1 1
0 0 0 0 1 1 1 1
0 0 0 0 1 1 1 1

Bandwidth Matrix (in GB/s)


In [294]:
df_dis_p2p = from_sdkp2p(2,twidth=7).bar(vmin=0,vmax=16,color="#ddd").applymap(lambda x: "background: #bbb" if x>100 else "background: #f00" if x<3 else "")
df_ena_p2p_w = from_sdkp2p(3,twidth=7).bar(vmin=0,vmax=16,color="#ddd").applymap(lambda x: "background: #bbb" if x>100 else "background: #f00" if x<3 else "")
df_ena_p2p_r = from_sdkp2p(4,twidth=7).bar(vmin=0,vmax=16,color="#ddd").applymap(lambda x: "background: #bbb" if x>100 else "background: #f00" if x<3 else "")
df_dis_p2p_bi = from_sdkp2p(5,twidth=7).bar(vmin=0,vmax=16,color="#ddd").applymap(lambda x: "background: #bbb" if x>100 else "background: #f00" if x<3 else "")
df_ena_p2p_bi = from_sdkp2p(6,twidth=7).bar(vmin=0,vmax=16,color="#ddd").applymap(lambda x: "background: #bbb" if x>100 else "background: #f00" if x<3 else "")

Unidirectional - P2P Disabled


In [295]:
df_dis_p2p


Out[295]:
0 1 2 3 4 5 6 7
172.76 10 10.22 10.32 5.83 5.84 6.37 6.76
9.89 173.05 10.25 10.33 5.84 5.87 5.82 6.03
10.3 10.31 172.88 10 5.85 6.47 6.57 6.75
10.25 10.27 10.01 172.69 6.31 7.44 7.04 6.79
6.6 6.73 6.79 6.72 172.95 5.19 5.6 6.19
6.59 6.69 6.81 6.72 5.58 172.47 5.67 6.13
6.59 6.93 6.8 6.75 6.02 5.59 172.21 5.6
6.61 6.71 6.76 6.77 5.99 5.63 5.26 170.99

Unidirectional - Writes, P2P Enabled


In [296]:
df_ena_p2p_w


Out[296]:
0 1 2 3 4 5 6 7
172.58 0.75 10.25 10.25 5.91 5.96 5.96 6.47
0.75 172.75 10.25 10.25 5.9 6.07 6.7 6.67
10.25 10.25 173.35 0.75 6.23 6.67 6.69 6.72
10.25 10.25 0.75 172.96 5.93 5.89 5.89 5.9
6.55 6.77 6.8 6.73 173.04 0.75 10.25 10.25
6.57 6.7 6.78 6.69 0.75 172.83 10.25 10.25
6.47 6.73 6.78 6.7 10.25 10.24 169.98 0.75
6.49 6.71 6.8 6.73 10.25 10.25 0.75 171.85

Unidirectional - Reads, P2P Enabled


In [297]:
df_ena_p2p_r


Out[297]:
0 1 2 3 4 5 6 7
172.37 0.75 10.2 10.19 6.53 6.58 6.6 6.6
0.75 173.13 10.19 10.2 6.67 6.91 6.8 6.74
10.19 10.19 173.32 0.75 6.74 6.87 6.86 6.79
10.19 10.19 0.75 173.05 6.69 6.79 6.75 6.73
6.63 6.66 6.64 6.59 172.82 0.75 10.19 10.19
5.82 6.18 6.64 6.6 0.75 171.65 10.19 10.19
6.43 6.57 6.59 6.62 10.2 10.2 172.58 0.75
5.83 5.85 5.84 5.84 10.2 10.19 0.75 172.17

Bidirectional - P2P Disabled


In [298]:
df_dis_p2p_bi


Out[298]:
0 1 2 3 4 5 6 7
173.38 8.52 18.62 18.43 7.21 7.28 7.33 7.25
8.42 173.84 18.63 18.4 7.43 7.43 7.42 7.43
18.66 18.55 173.79 8.58 7.69 8.45 7.8 7.63
18.46 18.48 8.52 173.86 7.32 7.31 7.44 7.36
7.21 7.42 7.62 7.31 173.4 3.26 3.81 3.81
7.21 7.41 7.57 7.34 3.39 173.79 3.8 3.8
7.29 7.4 7.59 7.32 3.86 3.81 173.9 3.44
7.23 7.43 7.59 7.38 3.84 3.75 3.35 173.44

Bidirectional - P2P Enabled


In [299]:
df_ena_p2p_bi


Out[299]:
0 1 2 3 4 5 6 7
173.83 1.4 19.64 19.64 7.22 7.28 7.26 7.25
1.4 173.95 19.66 19.66 7.39 7.41 7.51 7.44
19.68 19.68 173.97 1.4 7.56 7.61 7.74 7.65
19.69 19.69 1.4 173.64 7.35 7.34 7.35 7.32
7.25 7.4 7.57 7.35 173.92 1.4 19.69 19.7
7.24 7.38 7.56 7.27 1.4 174.03 19.67 19.66
7.18 7.44 7.57 7.33 19.7 19.7 173.38 1.4
7.23 7.44 7.61 7.34 19.71 19.71 1.4 173.75

Latency Matrix (in µs)


In [315]:
df_lat_gpu = from_sdkp2p(7,hide="GPU").bar(vmin=0,vmax=30,color="#ddd").applymap(lambda x: "background: #f00" if x>1000 else "")
df_lat_cpu = from_sdkp2p(8,hide="CPU").bar(vmin=0,vmax=30,color="#ddd").applymap(lambda x: "background: #f00" if x>1000 else "")
df_lat_gpu_p2p_w = from_sdkp2p(9,hide="GPU").bar(vmin=0,vmax=30,color="#ddd").applymap(lambda x: "background: #f00" if x>1000 else "")
df_lat_gpu_p2p_r = from_sdkp2p(11,hide="GPU").bar(vmin=0,vmax=30,color="#ddd").applymap(lambda x: "background: #f00" if x>1000 else "")

GPU to GPU - P2P Disabled


In [312]:
df_lat_gpu


Out[312]:
0 1 2 3 4 5 6 7
5.13 19.85 20.05 19.01 21.33 21.49 21.38 21.23
19.47 5.1 16.36 16.68 21.88 23.02 22.86 22.26
20.94 19.7 4.96 18.69 21.54 23.17 22.99 23.05
20.44 19.52 19.64 5.01 19.9 21.88 23.23 23.09
22.66 21.5 22.05 20.85 5.17 22.81 23.31 23.11
23.11 21.63 20.91 22.19 23.28 5.33 19.78 23.44
22.65 22.89 22.69 22.52 23.34 23.13 5.32 20.2
21.3 21.22 21.65 21.54 21.15 20.99 21.28 5.17

CPU to GPU - P2P Disabled


In [313]:
df_lat_cpu


Out[313]:
0 1 2 3 4 5 6 7
3.79 9.12 9.06 9.06 9.03 9.1 8.96 9.05
9.17 3.72 8.95 8.98 9.22 9.06 9.03 9.05
9.13 9.02 3.75 8.96 9.11 9.05 9.1 9
9.1 9.04 8.87 4.14 9.18 9.17 9.09 9.06
10.32 10.43 15.49 10.51 4.85 10.47 10.49 9.62
10.45 10.21 10.24 10.25 10.59 4.33 10.43 10.49
10.4 10.33 10.47 10.27 10.45 10.57 4.35 10.47
10.37 10.28 10.15 10.21 10.32 10.37 10.5 4.42

GPU to GPU - P2P Enabled, Writes


In [316]:
df_lat_gpu_p2p_w


Out[316]:
0 1 2 3 4 5 6 7
5.11 49368.8 1.88 1.88 23.37 23.05 22.81 23.34
49369.1 4.97 1.87 1.87 21.09 21.25 21.09 21.05
1.86 1.85 4.96 49369.1 19.12 19.99 21.25 21.01
1.92 1.86 49368.5 5.13 20.72 20.75 21.1 21.08
21.57 21.13 17.76 19.56 5.22 49369.3 2.14 2.08
21 21.74 21.01 20.76 49369.1 5.16 2.1 2.12
18.96 21.1 20.06 21.29 1.98 2.08 5.19 49369
18.86 22.22 21.83 17.9 2.09 2.01 49369.1 5.21

GPU to GPU - P2P Enabled, Reads


In [317]:
df_lat_gpu_p2p_r


Out[317]:
0 1 2 3 4 5 6 7
5.12 49377.7 14.15 13.69 19.02 20.15 19.51 18.87
49381.6 5.11 14.11 14.14 19.48 19.35 19.83 19.16
14.34 14.01 4.99 49374.8 19.68 19.46 19.71 19.27
13.13 14.12 49378.6 4.98 19.87 19.83 18.88 19.21
21.91 22.14 19.88 21.37 5.25 49380.7 13.61 13.26
21.34 21.68 22.34 20.61 49376.4 5.25 14.21 14.22
20.62 19.46 20.58 20.99 14.26 14.19 5.19 49381.9
21.98 21.72 21.03 18.72 14.26 14.23 49381.9 5.2

In [ ]: