In [1]:
%load_ext autoreload
%autoreload 2
import warnings
import pandas as pd
import numpy as np
import os
import sys # error msg, add the modules
import operator # sorting
from math import *
import matplotlib.pyplot as plt
sys.path.append('../../')
import cuda_timeline
import read_trace
import avgblk
import cke
from model_param import *
from df_util import *
warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning)
In [2]:
gtx950 = DeviceInfo()
gtx950.sm_num = 6
gtx950.sharedmem_per_sm = 49152
gtx950.reg_per_sm = 65536
gtx950.maxthreads_per_sm = 2048
In [3]:
# init SM resources
SM_resList, SM_traceList = init_gpu(gtx950)
In [4]:
SM_resList[0]
Out[4]:
In [5]:
SM_traceList[0]
Out[5]:
In [6]:
trace_file = '1stream_23000.csv'
df_trace = read_trace.Trace2dataframe(trace_file) # read the trace to the dataframe
In [7]:
#df_trace
In [8]:
# extract kernel info from trace
# warning: currently lmted to one kernel
kernel = read_trace.GetKernelInfo(df_trace, gtx950)
print kernel.runtime_ms
print kernel.avg_blk_time
print kernel.blockDim
print kernel.start_ms
In [9]:
# for each stream, have a dd for each kernel
stream_kernel_list = []
stream_num = 2
for sid in range(stream_num):
print sid
# key will be the kernel order
# value will be the kernel info
kern_dd = {}
kern_dd[0] = Copy_kern_info(kernel)
stream_kernel_list.append(kern_dd)
In [10]:
s1_kern_dd = stream_kernel_list[0]
print s1_kern_dd[0].runtime_ms
print s1_kern_dd[0].avg_blk_time
print s1_kern_dd[0].blockDim
print s1_kern_dd[0].start_ms
In [11]:
print len(stream_kernel_list)
print stream_kernel_list[1][0].runtime_ms
print stream_kernel_list[1][0].avg_blk_time
print stream_kernel_list[1][0].blockDim
print stream_kernel_list[1][0].start_ms
In [12]:
trace_file_2cke = '2stream_kern_ovlp_23000.csv'
df_trace_2cke = read_trace.Trace2dataframe(trace_file_2cke)
In [13]:
df_trace_2cke
Out[13]:
In [14]:
cuda_timeline.plot_trace(df_trace_2cke)
In [15]:
df_single_stream = read_trace.Get_timing_from_trace(df_trace)
# print df_single_stream
tot_runtime = read_trace.GetTotalRuntime(df_single_stream)
print tot_runtime
In [16]:
df_2stream = read_trace.Get_timing_from_trace(df_trace_2cke)
print df_2stream
tot_runtime = read_trace.GetTotalRuntime(df_2stream)
print tot_runtime
In [17]:
df_s1 = read_trace.Reset_starting(df_single_stream)
print df_s1
In [18]:
df_2stream_trace = read_trace.Reset_starting(df_2stream)
print df_2stream_trace
In [19]:
stream_num = 2
# find when to start the stream and update the starting pos for the trace
H2D_H2D_OVLP_TH = 3.158431
df_cke_list = cke.init_trace_list(df_s1, stream_num = stream_num, h2d_ovlp_th = H2D_H2D_OVLP_TH)
In [20]:
# df_cke_list[0]
In [21]:
# df_cke_list[1]
In [22]:
df_all_api = cke.init_sort_api_with_extra_cols(df_cke_list)
In [23]:
df_all_api
Out[23]:
In [24]:
count = 0
# break_count = 7
break_count = 7
while not cke.AllDone(df_all_api):
count = count + 1
#if count == break_count: break
#-----------------------
# pick two api to model
#-----------------------
df_all_api, r1, r2 = cke.PickTwo(df_all_api)
#if count == break_count: break
#-----------------------
# check the last api or not
#-----------------------
last_api = False
if r1 == None and r2 == None:
last_api = True
if last_api == True: # go directly updating the last wake api
df_all_api = cke.UpdateStream_lastapi(df_all_api)
break
#-----------------------
# move the current_pos to the starting of coming api r2, and update r1 status
#-----------------------
df_all_api = cke.StartNext_byType(df_all_api, [r1, r2])
#if count == break_count: break
#-----------------------------
# if one call is done, continue the next round
#-----------------------------
if cke.CheckRowDone(df_all_api, r1, r2):
continue
#if count == break_count: break
#-----------------------------
# when all calls are active
#-----------------------------
#-----------------------------
# check whether the two calls are kerns, if yes
#-----------------------------
whichType = cke.CheckType(df_all_api, r1, r2) # check whether the same api
if whichType == None:
df_all_api = cke.Predict_noConflict(df_all_api, r1, r2)
elif whichType in ['h2d', 'd2h']: # data transfer in the same direction
df_all_api = cke.Predict_transferOvlp(df_all_api, r1, r2, ways = 2.0)
else: # concurrent kernel: todo
print('run cke model')
#cke.model_2cke(df_all_api, r1, r2)
#if count == break_count: break
r1_sid, r1_kid =cke.FindStreamAndKernID(df_all_api, r1)
#print('r1_stream_id {} , r1_kernel_id {}'.format(r1_sid, r1_kid))
r2_sid, r2_kid =cke.FindStreamAndKernID(df_all_api, r2)
#print('r2_stream_id {} , r2_kernel_id {}'.format(r2_sid, r2_kid))
r1_start_ms = cke.GetStartTime(df_all_api, r1)
r2_start_ms = cke.GetStartTime(df_all_api, r2)
#print r1_start_ms
#print r2_start_ms
#print('before:')
#print('r1 start :{} r2 start : {}'.format(stream_kernel_list[r1_sid][r1_kid].start_ms,
# stream_kernel_list[r2_sid][r2_kid].start_ms))
stream_kernel_list[0][0].start_ms = r1_start_ms
stream_kernel_list[1][0].start_ms = r2_start_ms
#print('after:')
#print('r1 start :{} r2 start : {}'.format(stream_kernel_list[r1_sid][r1_kid].start_ms,
# stream_kernel_list[r2_sid][r2_kid].start_ms))
#Dump_kern_info(stream_kernel_list[r1_sid][r1_kid])
#Dump_kern_info(stream_kernel_list[r2_sid][r2_kid])
kernels_ = []
kernels_.append(stream_kernel_list[r1_sid][r1_kid])
kernels_.append(stream_kernel_list[r2_sid][r2_kid])
SM_resList, SM_traceList = avgblk.cke_model(gtx950, SM_resList, SM_traceList, kernels_)
# find the kernel execution time from the sm trace table
result_kernel_runtime_dd = avgblk.Get_KernTime(SM_traceList)
#print result_kernel_runtime_dd
result_r1_start = result_kernel_runtime_dd[0][0]
result_r1_end = result_kernel_runtime_dd[0][1]
result_r2_start = result_kernel_runtime_dd[1][0]
result_r2_end = result_kernel_runtime_dd[1][1]
# r1 will be the 1st in dd, r2 will be the 2nd
df_all_api.set_value(r1, 'pred_end', result_r1_end)
df_all_api.set_value(r2, 'pred_end', result_r2_end) # Warning: it is better to have a pred_start
# Warning: but we care about the end timing for now
#if count == break_count: break
# check any of r1 and r2 has status done. if done, go to next
rangeT = cke.Get_pred_range(df_all_api)
print rangeT
#if count == break_count: break
extra_conc = cke.Check_cc_by_time(df_all_api, rangeT) # check whether there is conc during the rangeT
print('extra_conc {}'.format(extra_conc))
#if count == break_count: break
if extra_conc == 0:
if whichType in ['h2d', 'd2h']:
df_all_api = cke.Update_wake_transferOvlp(df_all_api, rangeT, ways = 2.0)
elif whichType == 'kern':
df_all_api = cke.Update_wake_kernOvlp(df_all_api)
else: # no overlapping
df_all_api = cke.Update_wake_noConflict(df_all_api, rangeT)
#if count == break_count: break
# check if any api is done, and update the timing for the other apis in that stream
df_all_api = cke.UpdateStreamTime(df_all_api)
#if count == break_count: break
else: # todo : when there is additional overlapping
pass
# if count == break_count:
# break
In [25]:
df_all_api
Out[25]:
In [26]:
df_2stream_trace
Out[26]:
In [27]:
df_s1
Out[27]:
In [28]:
#
# run above
#