scheme:
In [498]:
%load_ext autoreload
%autoreload 2
import warnings
import pandas as pd
import numpy as np
import os
import sys # error msg, add the modules
import operator # sorting
from math import *
import matplotlib.pyplot as plt
sys.path.append('../../')
import cuda_timeline
import read_trace
import avgblk
import cke
from model_param import *
#from df_util import *
warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning)
In [499]:
gtx950 = DeviceInfo()
gtx950.sm_num = 6
gtx950.sharedmem_per_sm = 49152
gtx950.reg_per_sm = 65536
gtx950.maxthreads_per_sm = 2048
In [500]:
# init SM resources
SM_resList, SM_traceList = init_gpu(gtx950)
In [501]:
#SM_resList[0]
In [502]:
SM_traceList[0]
Out[502]:
In [503]:
trace_s1 = 'trace_s1_5m.csv'
df_trace_s1 = read_trace.Trace2dataframe(trace_s1)
trace_s2 = 'trace_s2_5m.csv'
df_trace_s2 = read_trace.Trace2dataframe(trace_s2)
trace_s3 = 'trace_s3_5m.csv'
df_trace_s3 = read_trace.Trace2dataframe(trace_s3)
In [504]:
df_trace_s1
Out[504]:
In [505]:
cuda_timeline.plot_trace(df_trace_s1)
In [506]:
cuda_timeline.plot_trace(df_trace_s2)
In [507]:
cuda_timeline.plot_trace(df_trace_s3)
In [508]:
# extract kernel info from trace
# warning: currently lmted to one kernel
kernel = read_trace.GetKernelInfo(df_trace_s1, gtx950)
Dump_kernel_info(kernel)
In [509]:
# for each stream, have a dd for each kernel
stream_kernel_list = []
stream_num = 3
for sid in range(stream_num):
#print sid
# key will be the kernel order
# value will be the kernel info
kern_dd = {}
kern_dd[0] = Copy_kernel_info(kernel)
stream_kernel_list.append(kern_dd)
Dump_kernel_info(stream_kernel_list[0][0])
In [510]:
df_s1_trace_timing = read_trace.Get_timing_from_trace(df_trace_s1)
df_s1 = read_trace.Reset_starting(df_s1_trace_timing)
In [511]:
df_s1
Out[511]:
In [512]:
# find when to start the stream and update the starting pos for the trace
H2D_H2D_OVLP_TH = 3.158431
df_cke_list = cke.init_trace_list(df_s1, stream_num = stream_num, h2d_ovlp_th = H2D_H2D_OVLP_TH)
In [513]:
df_cke_list[0]
Out[513]:
In [514]:
df_cke_list[1]
Out[514]:
In [515]:
df_cke_list[2]
Out[515]:
In [516]:
df_all_api = cke.init_sort_api_with_extra_cols(df_cke_list)
In [517]:
df_all_api
Out[517]:
In [518]:
# stream_id list
stream_list = [float(x) for x in range(stream_num)]
# pick the 1st sleep api
df_all_api, r1, r1_stream = cke.pick_first_sleep(df_all_api)
df_all_api = SetWake(df_all_api, r1)
df_all_api = UpdateCell(df_all_api, r1, 'current_pos', get_rowinfo(df_all_api, r1)['start'])
df_all_api = UpdateCell(df_all_api, r1, 'pred_end', get_rowinfo(df_all_api, r1)['end'])
print('row {}, stream-id {}'.format(r1, r1_stream))
stream_queue = []
stream_queue.append(r1_stream)
## conconcurrency
cc = 1.0
# extract api calls from other streams
df_other = df_all_api.loc[df_all_api.stream_id <> r1_stream]
other_stream_ids = list(df_other.stream_id.unique())
other_stream_num = len(other_stream_ids)
for i in range(other_stream_num):
df_other, r2, r2_stream = cke.pick_first_sleep(df_other)
print('row {}, stream-id {}'.format(r2, r2_stream))
df_all_api = SetWake(df_all_api, r2)
df_all_api = UpdateCell(df_all_api, r2, 'current_pos', get_rowinfo(df_all_api, r2)['start'])
df_all_api = UpdateCell(df_all_api, r2, 'pred_end', get_rowinfo(df_all_api, r2)['end'])
#---------------
# if r1 and r2 are from the same stream, break the iteration, and finish r1
#---------------
if r1_stream == r2_stream:
break
# when they are not the same stream, check whether there is concurrency
#-----------------------
# move the current_pos to the starting of coming api r2, and update r1 status
#-----------------------
df_all_api = cke.StartNext_byType(df_all_api, [r1, r2])
#-----------------------------
# if one call is done, continue the next round
#-----------------------------
if cke.CheckRowDone(df_all_api, [r1, r2]):
continue
whichType = cke.CheckType(df_all_api, r1, r2) # check whether the same api
print whichType
if whichType == None:
# run noconflict
pass
elif whichType in ['h2d', 'd2h']: # data transfer in the same direction
cc = cc + 1
df_all_api = cke.Predict_transferOvlp(df_all_api, [r1, r2], ways = cc)
break
else:
# concurrent kernel: todo
pass
break
# other_stream_list = cke.find_unique_streams(df_other)
# find the 1st sleep api that is other stream
# if there is overlapping, we start ovlp mode, if not finish r1, start current
# go through each
# rest_stream_list = [x for x in stream_list if x <> r1_stream]
# print rest_stream_list
# for sid in rest_stream_list:
# df_stream = df_all_api.loc[df_all_api.stream_id == sid]
In [519]:
df_all_api
Out[519]:
In [ ]:
#
#
# run above
In [24]:
count = 0
# break_count = 7
break_count = 7
while not cke.AllDone(df_all_api):
count = count + 1
#if count == break_count: break
#-----------------------
# pick two api to model
#-----------------------
df_all_api, r1, r2 = cke.PickTwo(df_all_api)
#if count == break_count: break
#-----------------------
# check the last api or not
#-----------------------
last_api = False
if r1 == None and r2 == None:
last_api = True
if last_api == True: # go directly updating the last wake api
df_all_api = cke.UpdateStream_lastapi(df_all_api)
break
#-----------------------
# move the current_pos to the starting of coming api r2, and update r1 status
#-----------------------
df_all_api = cke.StartNext_byType(df_all_api, [r1, r2])
#if count == break_count: break
#-----------------------------
# if one call is done, continue the next round
#-----------------------------
if cke.CheckRowDone(df_all_api, r1, r2):
continue
#if count == break_count: break
#-----------------------------
# when all calls are active
#-----------------------------
#-----------------------------
# check whether the two calls are kerns, if yes
#-----------------------------
whichType = cke.CheckType(df_all_api, r1, r2) # check whether the same api
if whichType == None:
df_all_api = cke.Predict_noConflict(df_all_api, r1, r2)
elif whichType in ['h2d', 'd2h']: # data transfer in the same direction
df_all_api = cke.Predict_transferOvlp(df_all_api, r1, r2, ways = 2.0)
else: # concurrent kernel: todo
print('run cke model')
#cke.model_2cke(df_all_api, r1, r2)
#if count == break_count: break
r1_sid, r1_kid =cke.FindStreamAndKernID(df_all_api, r1)
#print('r1_stream_id {} , r1_kernel_id {}'.format(r1_sid, r1_kid))
r2_sid, r2_kid =cke.FindStreamAndKernID(df_all_api, r2)
#print('r2_stream_id {} , r2_kernel_id {}'.format(r2_sid, r2_kid))
r1_start_ms = cke.GetStartTime(df_all_api, r1)
r2_start_ms = cke.GetStartTime(df_all_api, r2)
#print r1_start_ms
#print r2_start_ms
#print('before:')
#print('r1 start :{} r2 start : {}'.format(stream_kernel_list[r1_sid][r1_kid].start_ms,
# stream_kernel_list[r2_sid][r2_kid].start_ms))
stream_kernel_list[0][0].start_ms = r1_start_ms
stream_kernel_list[1][0].start_ms = r2_start_ms
#print('after:')
#print('r1 start :{} r2 start : {}'.format(stream_kernel_list[r1_sid][r1_kid].start_ms,
# stream_kernel_list[r2_sid][r2_kid].start_ms))
#Dump_kern_info(stream_kernel_list[r1_sid][r1_kid])
#Dump_kern_info(stream_kernel_list[r2_sid][r2_kid])
kernels_ = []
kernels_.append(stream_kernel_list[r1_sid][r1_kid])
kernels_.append(stream_kernel_list[r2_sid][r2_kid])
SM_resList, SM_traceList = avgblk.cke_model(gtx950, SM_resList, SM_traceList, kernels_)
# find the kernel execution time from the sm trace table
result_kernel_runtime_dd = avgblk.Get_KernTime(SM_traceList)
#print result_kernel_runtime_dd
result_r1_start = result_kernel_runtime_dd[0][0]
result_r1_end = result_kernel_runtime_dd[0][1]
result_r2_start = result_kernel_runtime_dd[1][0]
result_r2_end = result_kernel_runtime_dd[1][1]
# r1 will be the 1st in dd, r2 will be the 2nd
df_all_api.set_value(r1, 'pred_end', result_r1_end)
df_all_api.set_value(r2, 'pred_end', result_r2_end) # Warning: it is better to have a pred_start
# Warning: but we care about the end timing for now
#if count == break_count: break
# check any of r1 and r2 has status done. if done, go to next
rangeT = cke.Get_pred_range(df_all_api)
print rangeT
#if count == break_count: break
extra_conc = cke.Check_cc_by_time(df_all_api, rangeT) # check whether there is conc during the rangeT
print('extra_conc {}'.format(extra_conc))
#if count == break_count: break
if extra_conc == 0:
if whichType in ['h2d', 'd2h']:
df_all_api = cke.Update_wake_transferOvlp(df_all_api, rangeT, ways = 2.0)
elif whichType == 'kern':
df_all_api = cke.Update_wake_kernOvlp(df_all_api)
else: # no overlapping
df_all_api = cke.Update_wake_noConflict(df_all_api, rangeT)
#if count == break_count: break
# check if any api is done, and update the timing for the other apis in that stream
df_all_api = cke.UpdateStreamTime(df_all_api)
#if count == break_count: break
else: # todo : when there is additional overlapping
pass
# if count == break_count:
# break
In [25]:
df_all_api
Out[25]:
In [26]:
df_2stream_trace
Out[26]:
In [27]:
df_s1
Out[27]:
In [28]:
#
# run above
#