cke



In [1]:
import warnings
import pandas as pd
import numpy as np
import os
import sys # error msg
import operator # sorting
from math import *

from read_trace import *
from avgblkmodel import *

warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning)

gpu info


In [2]:
gtx950 = DeviceInfo()
gtx950.sm_num = 6
gtx950.sharedmem_per_sm = 49152
gtx950.reg_per_sm = 65536
gtx950.maxthreads_per_sm = 2048

single stream info


In [3]:
data_size = 23000
trace_file = './1cke/trace_' + str(data_size) + '.csv'
df_trace = trace2dataframe(trace_file) # read the trace to the dataframe

In [4]:
df_trace


Out[4]:
Start Duration Grid X Grid Y Grid Z Block X Block Y Block Z Registers Per Thread Static SMem Dynamic SMem Size Throughput Device Context Stream Name
0 ms us NaN NaN NaN NaN NaN NaN NaN B B KB GB/s NaN NaN NaN NaN
1 526.961828 16.672000 NaN NaN NaN NaN NaN NaN NaN NaN NaN 89.843750 5.139256 GeForce GTX 950 (0) 1.0 13.0 [CUDA memcpy HtoD]
2 526.979716 16.224000 NaN NaN NaN NaN NaN NaN NaN NaN NaN 89.843750 5.281168 GeForce GTX 950 (0) 1.0 13.0 [CUDA memcpy HtoD]
3 527.157829 61.056000 90.0 1.0 1.0 256.0 1.0 1.0 28.0 0 0 NaN NaN GeForce GTX 950 (0) 1.0 13.0 kernel_vectorAdd(float const *, float const *,...
4 527.221349 15.904000 NaN NaN NaN NaN NaN NaN NaN NaN NaN 89.843750 5.387429 GeForce GTX 950 (0) 1.0 13.0 [CUDA memcpy DtoH]

In [5]:
df_single_stream = model_param_from_trace(df_trace)
df_single_stream.head(20)


Out[5]:
seq api_type start end duration
0 0.0 h2d 526.961828 526.978500 0.016672
1 1.0 h2d_h2d_ovhd 526.978500 526.979716 0.001216
2 2.0 h2d 526.979716 526.995940 0.016224
3 3.0 h2d_kern_ovhd 526.995940 527.157829 0.161889
4 4.0 kern 527.157829 527.218885 0.061056
5 5.0 kern_d2h_ovhd 527.218885 527.221349 0.002464
6 6.0 d2h 527.221349 527.237253 0.015904

model 2 stream case

we need a trace table to track the timing for each kernel


In [6]:
df_single_stream_update = df_single_stream.copy(deep=True)
#df_cke['stream'] = 0 # add stream column with label 0

In [7]:
df_single_stream_update


Out[7]:
seq api_type start end duration
0 0.0 h2d 526.961828 526.978500 0.016672
1 1.0 h2d_h2d_ovhd 526.978500 526.979716 0.001216
2 2.0 h2d 526.979716 526.995940 0.016224
3 3.0 h2d_kern_ovhd 526.995940 527.157829 0.161889
4 4.0 kern 527.157829 527.218885 0.061056
5 5.0 kern_d2h_ovhd 527.218885 527.221349 0.002464
6 6.0 d2h 527.221349 527.237253 0.015904

In [8]:
# deduct the starting timing
offset = df_single_stream_update.start[0]
#print offset
df_single_stream_update.start = df_single_stream_update.start - offset
df_single_stream_update.end = df_single_stream_update.end - offset
print df_single_stream_update


   seq       api_type     start       end  duration
0  0.0            h2d  0.000000  0.016672  0.016672
1  1.0   h2d_h2d_ovhd  0.016672  0.017888  0.001216
2  2.0            h2d  0.017888  0.034112  0.016224
3  3.0  h2d_kern_ovhd  0.034112  0.196001  0.161889
4  4.0           kern  0.196001  0.257057  0.061056
5  5.0  kern_d2h_ovhd  0.257057  0.259521  0.002464
6  6.0            d2h  0.259521  0.275425  0.015904

In [9]:
## make duplication for the same trace
stream_num = 2

df_cke_list = []
for x in range(stream_num):
    df_cke_list.append(df_single_stream_update.copy(deep=True))

In [10]:
df_cke_list[0]


Out[10]:
seq api_type start end duration
0 0.0 h2d 0.000000 0.016672 0.016672
1 1.0 h2d_h2d_ovhd 0.016672 0.017888 0.001216
2 2.0 h2d 0.017888 0.034112 0.016224
3 3.0 h2d_kern_ovhd 0.034112 0.196001 0.161889
4 4.0 kern 0.196001 0.257057 0.061056
5 5.0 kern_d2h_ovhd 0.257057 0.259521 0.002464
6 6.0 d2h 0.259521 0.275425 0.015904

In [11]:
# 2nd stream trace
df_cke_list[1]


Out[11]:
seq api_type start end duration
0 0.0 h2d 0.000000 0.016672 0.016672
1 1.0 h2d_h2d_ovhd 0.016672 0.017888 0.001216
2 2.0 h2d 0.017888 0.034112 0.016224
3 3.0 h2d_kern_ovhd 0.034112 0.196001 0.161889
4 4.0 kern 0.196001 0.257057 0.061056
5 5.0 kern_d2h_ovhd 0.257057 0.259521 0.002464
6 6.0 d2h 0.259521 0.275425 0.015904

In [17]:
df_prev_stream = df_cke_list[0]

# compute the time for the init data transfer
H2D_H2D_OVLP_TH = 3.158431

# we need to know the data transfer time before the 1st kernel call
h2d_first_ind = -1
h2d_last_ind = -1
for index, row in df_prev_stream.iterrows():
    if row['api_type'] == 'h2d':
        h2d_first_ind = 0
        
    if row['api_type'] == 'h2d_kern_ovhd':
        h2d_last_ind = index -1
        break;
        
h2d_start = df_prev_stream.loc[h2d_first_ind]['start']
h2d_finish = df_prev_stream.loc[h2d_last_ind]['end']  
h2d_duration = h2d_finish - h2d_start
print("h2d : {} - {} = {}".format(h2d_start, h2d_finish, h2d_duration))

# check whether we need to start second stream data transfer till the previous one finishes
current_stream_start = 0
if h2d_duration > H2D_H2D_OVLP_TH:
    current_stream_start = H2D_H2D_OVLP_TH
else:
    current_stream_start = h2d_finish
    
print current_stream_start
#df_single_stream.loc[0]['api_type']


h2d : 0.0 - 0.0341120000001 = 0.0341120000001
0.0341120000001

In [19]:
# add the start_offset to 2nd stream trace
df_cke_list[1].start = df_cke_list[1].start + current_stream_start
df_cke_list[1].end = df_cke_list[1].end + current_stream_start

In [20]:
df_cke_list[1]


Out[20]:
seq api_type start end duration
0 0.0 h2d 0.034112 0.050784 0.016672
1 1.0 h2d_h2d_ovhd 0.050784 0.052000 0.001216
2 2.0 h2d 0.052000 0.068224 0.016224
3 3.0 h2d_kern_ovhd 0.068224 0.230113 0.161889
4 4.0 kern 0.230113 0.291169 0.061056
5 5.0 kern_d2h_ovhd 0.291169 0.293633 0.002464
6 6.0 d2h 0.293633 0.309537 0.015904

figure out whether there is h2d ovlp


In [21]:
def find_h2d_timing(df_trace):
    """
    find the h2d start and end for the current stream
    """
    # we need to know the data transfer time before the 1st kernel call
    h2d_first_ind = -1
    h2d_last_ind = -1
    for index, row in df_trace.iterrows():
        if row['api_type'] == 'h2d':
            h2d_first_ind = 0

        if row['api_type'] == 'h2d_kern_ovhd':
            h2d_last_ind = index -1
            break;
        
    h2d_start = df_trace.loc[h2d_first_ind]['start']
    h2d_finish = df_trace.loc[h2d_last_ind]['end']  
    
    return h2d_start, h2d_finish

In [24]:
prev_h2d_start, prev_h2d_end = find_h2d_timing(df_cke_list[0])
print("prev : {} - {}".format(prev_h2d_start, prev_h2d_end))

current_h2d_start, current_h2d_end = find_h2d_timing(df_cke_list[1])
print("current : {} - {}".format(current_h2d_start, current_h2d_end))


prev : 0.0 - 0.0341120000001
current : 0.0341120000001 - 0.0682240000001

In [25]:
h2d_ovlp = 0
if current_h2d_start < prev_h2d_end and current_h2d_start >= prev_h2d_start:
    h2d_ovlp = 1
print("h2d ovlap : {}".format(h2d_ovlp))


h2d ovlap : 0

figure out whether there is kernel ovlp


In [26]:
def find_kernel_timing(df_trace):
    """
    find the h2d start and end for the current stream
    """
    # we need to know the data transfer time before the 1st kernel call
    df_kern = df_trace.loc[df_trace['api_type'] == 'kern']
    kernel_timing_ls = []
    for index, row in df_kern.iterrows():
        kernel_timing_ls.append([row['start'], row['end']])
    
    return kernel_timing_ls

In [33]:
prev_kernel_timing = find_kernel_timing(df_cke_list[0])
print prev_kernel_timing
curr_kernel_timing = find_kernel_timing(df_cke_list[1])
print curr_kernel_timing

curr_start = curr_kernel_timing[0][0]
prev_start = prev_kernel_timing[0][0]
prev_end = prev_kernel_timing[0][1]

if curr_start >= prev_start and curr_start < prev_end:
    print("Overlapping betweent the kernel")


[[0.19600100000002385, 0.2570570000000316]]
[[0.23011300000007395, 0.2911690000000817]]
Overlapping betweent the kernel