scheme:
In [1]:
%load_ext autoreload
%autoreload 2
import warnings
import pandas as pd
import numpy as np
import os
import sys # error msg, add the modules
import operator # sorting
from math import *
import matplotlib.pyplot as plt
sys.path.append('../../')
import cuda_timeline
import read_trace
import avgblk
import cke
from model_param import *
#from df_util import *
warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning)
In [2]:
gtx950 = DeviceInfo()
gtx950.sm_num = 6
gtx950.sharedmem_per_sm = 49152
gtx950.reg_per_sm = 65536
gtx950.maxthreads_per_sm = 2048
In [3]:
# init SM resources
SM_resList, SM_traceList = init_gpu(gtx950)
In [4]:
#SM_resList[0]
In [5]:
SM_traceList[0]
Out[5]:
In [6]:
trace_s1 = 'trace_s1_5m.csv'
df_trace_s1 = read_trace.Trace2dataframe(trace_s1)
trace_s3 = 'trace_s3_5m.csv'
df_trace_s3 = read_trace.Trace2dataframe(trace_s3)
In [7]:
#df_trace_s1
In [8]:
cuda_timeline.plot_trace(df_trace_s1)
In [9]:
#cuda_timeline.plot_trace(df_trace_s2)
In [10]:
cuda_timeline.plot_trace(df_trace_s3)
In [11]:
# extract kernel info from trace
# warning: currently lmted to one kernel
kernel = read_trace.GetKernelInfo(df_trace_s1, gtx950)
Dump_kernel_info(kernel)
In [12]:
# for each stream, have a dd for each kernel
stream_kernel_list = []
stream_num = 3
for sid in range(stream_num):
#print sid
# key will be the kernel order
# value will be the kernel info
kern_dd = {}
kern_dd[0] = Copy_kernel_info(kernel)
stream_kernel_list.append(kern_dd)
Dump_kernel_info(stream_kernel_list[0][0])
In [13]:
df_s1_trace_timing = read_trace.Get_timing_from_trace(df_trace_s1)
df_s1 = read_trace.Reset_starting(df_s1_trace_timing)
In [14]:
df_s1
Out[14]:
In [15]:
# find when to start the stream and update the starting pos for the trace
H2D_H2D_OVLP_TH = 3.158431
df_cke_list = cke.init_trace_list(df_s1, stream_num = stream_num, h2d_ovlp_th = H2D_H2D_OVLP_TH)
In [16]:
df_cke_list[0]
Out[16]:
In [17]:
df_cke_list[1]
Out[17]:
In [18]:
df_cke_list[2]
Out[18]:
In [19]:
df_all_api = cke.init_sort_api_with_extra_cols(df_cke_list)
In [20]:
df_all_api
Out[20]:
In [21]:
# stream_id list
stream_list = [float(x) for x in range(stream_num)]
# pick the 1st sleep api
df_all_api, r1, r1_stream = cke.pick_base_call(df_all_api)
df_all_api = SetWake(df_all_api, r1)
print('row {}, stream-id {}'.format(r1, r1_stream))
# select coming (stream_num - 1) api calls, they are potential concurrency api calls
cc_rows = FindComingCalls(df_all_api, r1, stream_num)
count = 1
active_stream_pool = []
active_stream_pool.append(r1_stream)
for r2 in cc_rows:
print('active_stream_pool : {}'.format(active_stream_pool))
#if count == 1: break
#if count == 2: break
# wake target row
df_all_api = SetWake(df_all_api, r2)
r2_stream = GetStreamID(df_all_api, r2)
print('row {}, stream-id {}'.format(r2, r2_stream))
if count == 2: break
# check whether current stream is in the active stream pool
# where I mean they are from the same stream
if r2_stream in active_stream_pool:
# finish previous call
df_all_api = cke.finish_call(df_all_api, r1)
# update the time for all the calls in this stream
df_all_api = cke.UpdateStreamTime(df_all_api)
# adjust the r2 current pos and pred_end
df_all_api = UpdateCell(df_all_api, r2, 'current_pos', GetInfo(df_all_api, r2, 'start'))
df_all_api = UpdateCell(df_all_api, r2, 'pred_end', GetInfo(df_all_api, r2, 'end'))
break
# if it is a new stream, add to the pool
active_stream_pool.append(r2_stream)
#---------------------------------------
# move the current pos to the r2 start (to do)
#---------------------------------------
#if count == 1: break
#if count == 2: break
# rangeT = cke.Get_pred_range(df_all_api)
# print rangeT
# # check whether there is conc during the rangeT
# cke.Check_ovlp(df_all_api, r1, r2)
#break
# when they are from different streams
#-----------------------
# move the current_pos to the starting of coming api r2, and update r1 status
#-----------------------
#df_all_api = cke.StartNext_byType(df_all_api, [r1, r2])
#break
df_all_api = cke.MoveCurPos(df_all_api, r2) # start r2, check concurrency,and update the timing
#if count == 1: break
#if count == 2: break
#break
#-----------------------------
# if one call is done, continue the next round
#-----------------------------
rowDone, rowDone_list = cke.CheckRowDone(df_all_api, [r1, r2])
if rowDone:
print('soem call is ended, go for next call')
print rowDone_list
for each in rowDone_list: # throw away the done stream
active_stream_pool = [x for x in active_stream_pool if x <> each]
print active_stream_pool
count += 1
#break
continue
whichType = cke.CheckType(df_all_api, r1, r2) # check whether the same api
print whichType
if whichType == None:
# run noconflict
pass
elif whichType in ['h2d', 'd2h']: # data transfer in the same direction
df_all_api = cke.Predict_transferOvlp(df_all_api, [r1, r2])
if count == 1: break
#break
else:
# concurrent kernel: todo
pass
#--------------------
# check if any api is done, and update the timing for the other apis in that stream
#--------------------
df_all_api = cke.UpdateStreamTime(df_all_api)
count += 1
#break
## after checking the commming call for ovlp
In [22]:
df_all_api
Out[22]:
In [23]:
#
# run above
#