t2-cke



In [1]:
import warnings
import pandas as pd
import numpy as np
import os
import sys # error msg
import operator # sorting
from math import *

from read_trace import *
from avgblkmodel import *

warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning)

gpu info


In [2]:
gtx950 = DeviceInfo()
gtx950.sm_num = 6
gtx950.sharedmem_per_sm = 49152
gtx950.reg_per_sm = 65536
gtx950.maxthreads_per_sm = 2048

single stream info


In [3]:
data_size = 23000
trace_file = './1cke/trace_' + str(data_size) + '.csv'
df_trace = trace2dataframe(trace_file) # read the trace to the dataframe

In [4]:
df_trace


Out[4]:
Start Duration Grid X Grid Y Grid Z Block X Block Y Block Z Registers Per Thread Static SMem Dynamic SMem Size Throughput Device Context Stream Name
0 ms us NaN NaN NaN NaN NaN NaN NaN B B KB GB/s NaN NaN NaN NaN
1 526.961828 16.672000 NaN NaN NaN NaN NaN NaN NaN NaN NaN 89.843750 5.139256 GeForce GTX 950 (0) 1.0 13.0 [CUDA memcpy HtoD]
2 526.979716 16.224000 NaN NaN NaN NaN NaN NaN NaN NaN NaN 89.843750 5.281168 GeForce GTX 950 (0) 1.0 13.0 [CUDA memcpy HtoD]
3 527.157829 61.056000 90.0 1.0 1.0 256.0 1.0 1.0 28.0 0 0 NaN NaN GeForce GTX 950 (0) 1.0 13.0 kernel_vectorAdd(float const *, float const *,...
4 527.221349 15.904000 NaN NaN NaN NaN NaN NaN NaN NaN NaN 89.843750 5.387429 GeForce GTX 950 (0) 1.0 13.0 [CUDA memcpy DtoH]

In [5]:
df_single_stream = model_param_from_trace_v1(df_trace)
df_single_stream.head(20)


Out[5]:
api_type start end duration
0 h2d 526.961828 526.978500 0.016672
1 h2d 526.979716 526.995940 0.016224
2 kern 527.157829 527.218885 0.061056
3 d2h 527.221349 527.237253 0.015904

In [6]:
df_s1 = reset_starting(df_single_stream)

In [7]:
df_s1


Out[7]:
api_type start end duration
0 h2d 0.000000 0.016672 0.016672
1 h2d 0.017888 0.034112 0.016224
2 kern 0.196001 0.257057 0.061056
3 d2h 0.259521 0.275425 0.015904

running 2cke case


In [8]:
stream_num = 2

df_cke_list = []
for x in range(stream_num):
    df_cke_list.append(df_s1.copy(deep=True))

In [9]:
df_cke_list[0]


Out[9]:
api_type start end duration
0 h2d 0.000000 0.016672 0.016672
1 h2d 0.017888 0.034112 0.016224
2 kern 0.196001 0.257057 0.061056
3 d2h 0.259521 0.275425 0.015904

In [10]:
df_cke_list[1]


Out[10]:
api_type start end duration
0 h2d 0.000000 0.016672 0.016672
1 h2d 0.017888 0.034112 0.016224
2 kern 0.196001 0.257057 0.061056
3 d2h 0.259521 0.275425 0.015904

In [11]:
H2D_H2D_OVLP_TH = 3.158431

for i in range(1,stream_num):
    # compute the time for the init data transfer
    stream_startTime = find_whentostart_comingStream(df_cke_list[i-1], H2D_H2D_OVLP_TH)
    print('stream_startTime : {}'.format(stream_startTime))
    df_cke_list[i].start += stream_startTime
    df_cke_list[i].end   += stream_startTime


stream_startTime : 0.0341120000001

In [12]:
df_cke_list[0]


Out[12]:
api_type start end duration
0 h2d 0.000000 0.016672 0.016672
1 h2d 0.017888 0.034112 0.016224
2 kern 0.196001 0.257057 0.061056
3 d2h 0.259521 0.275425 0.015904

In [13]:
df_cke_list[1]


Out[13]:
api_type start end duration
0 h2d 0.034112 0.050784 0.016672
1 h2d 0.052000 0.068224 0.016224
2 kern 0.230113 0.291169 0.061056
3 d2h 0.293633 0.309537 0.015904

check whether there is h2d overlapping


In [14]:
prev_stm_h2ds_start, prev_stm_h2ds_end = find_h2ds_timing(df_cke_list[0])
print("prev stream h2ds : {} - {}".format(prev_stm_h2ds_start, prev_stm_h2ds_end))

curr_stm_h2ds_start, curr_stm_h2ds_end = find_h2ds_timing(df_cke_list[1])
print("curr stream h2ds : {} - {}".format(curr_stm_h2ds_start, curr_stm_h2ds_end))


prev stream h2ds : 0.0 - 0.0341120000001
curr stream h2ds : 0.0341120000001 - 0.0682240000001

In [15]:
if curr_stm_h2ds_start >=prev_stm_h2ds_start and curr_stm_h2ds_start < prev_stm_h2ds_end:
    h2ds_ovlp_between_stream = True 
else:
    h2ds_ovlp_between_stream = False

print("h2ds_ovlp_between_stream : {}".format(h2ds_ovlp_between_stream))


h2ds_ovlp_between_stream : False

check kernel overlapping


In [16]:
prev_stm_kern_start, prev_stm_kern_end = find_kern_timing(df_cke_list[0])
print("prev stream kern : {} - {}".format(prev_stm_kern_start, prev_stm_kern_end))

curr_stm_kern_start, curr_stm_kern_end = find_kern_timing(df_cke_list[1])
print("curr stream kern : {} - {}".format(curr_stm_kern_start, curr_stm_kern_end))


prev stream kern : 0.196001 - 0.257057
curr stream kern : 0.230113 - 0.291169

In [17]:
if  prev_stm_kern_start <= curr_stm_kern_start < prev_stm_kern_end:
    kern_ovlp_between_stream = True 
else:
    kern_ovlp_between_stream = False

print("kern_ovlp_between_stream : {}".format(kern_ovlp_between_stream))


kern_ovlp_between_stream : True

use cke model if kern_ovlp_between_stream is true


In [18]:
# get the overlapping kernel info from both stream

kernel_ = model_cke_from_same_kernel(gtx950, df_trace, )